How to Extract Multiple Strings from Single Rows in SQL Server

How to extract multiple strings from single rows in SQL Server

You can use a cte recursively to strip out the strings.

declare @T table (id int, [text] nvarchar(max))

insert into @T values (1, 'Peter (Peter@peter.de) and Marta (marty@gmail.com) are doing fine.')
insert into @T values (2, 'Nothing special here')
insert into @T values (3, 'Another email address (me@my.com)')

;with cte([text], email)
as
(
select
right([text], len([text]) - charindex(')', [text], 0)),
substring([text], charindex('(', [text], 0) + 1, charindex(')', [text], 0) - charindex('(', [text], 0) - 1)
from @T
where charindex('(', [text], 0) > 0
union all
select
right([text], len([text]) - charindex(')', [text], 0)),
substring([text], charindex('(', [text], 0) + 1, charindex(')', [text], 0) - charindex('(', [text], 0) - 1)
from cte
where charindex('(', [text], 0) > 0
)
select email
from cte

Result

email
Peter@peter.de
me@my.com
marty@gmail.com

sql extract multiple words from one cell

You can try and use a mixture of XML and CrossApply, such as follows:

DECLARE @t TABLE(
ID int, MyString nvarchar(1000)
)

INSERT INTO @t VALUES
(1, N'{\rtf1\ansi\ansicpg1252\deff0\deflang1031{\fonttbl{\f0\fnil\fprq15\fcharset0 Arial;}} \viewkind4\uc1\pard\f0\fs16 HPE Gen8 300GB 6G SAS 10K \par SN:\tab 2C7713I0VG \par \tab 2C7713I0WX \par \tab 2C7712I0HP \par \tab 2C7713I0WS \par }')
,(2, N'{\rtf1\ansi\ansicpg1252\deff0\deflang1031{\fonttbl{\f0\fnil\fprq15\fcharset0 Arial;}} \viewkind4\uc1\pard\f0\fs16 HGST Ultrastar 7K4000 2TB HDD \par SN:\tab P5JKS38W \par }')
,(3, N'{\rtf1\ansi\ansicpg1252\deff0\deflang1031{\fonttbl{\f0\fnil\fprq15\fcharset0 Arial;}} \viewkind4\uc1\pard\f0\fs16 HGST Ultrastar 7K4000 2TB HDD}')

;WITH cte1 AS(
SELECT ID, SUBSTRING(MyString, CHARINDEX('SN:', MyString)+3, LEN(MyString)-CHARINDEX('SN:', MyString)-2) AS MyString
FROM @t
WHERE CHARINDEX('SN:', MyString) > 0
)
select
a.value('.', 'varchar(max)') AS SerialNr
from
(select cast('<M>' + rtrim(ltrim(REPLACE(REPLACE(REPLACE(REPLACE(MyString, '\tab', '</M><M>'),'\par',''),'{',''),'}',''))) + '</M>' AS XML) as col from cte1) as A
CROSS APPLY A.col.nodes ('/M') AS Split(a)
where NULLIF(a.value('.', 'varchar(max)'),'') IS NOT NULL

However, this will only work if SN is the last part of your string. If anything else follows in the string, you will have to consider this in your substring in cte1.

Extract multiple elements from a string

create table #text_table (text_field varchar(250))

insert into #text_table values
('Scheme ELS updated. Reserve Authority Level changed from: 11000000.00 to: 8000000.00.'),
('Scheme CNST updated. Payment Authority Level changed from: 8000000.00 to: 10000000.00. Reserve Authority Level changed from: 8000000.00 to: 10000000.00.'),
('Scheme ELS updated. Payment Authority Level changed from: 350000.00 to: 100000.00.')

drop table #text_table2
select *,
case when charindex('Reserve', text_field ,0) > 0 and charindex('Payment', text_field,0) = 0 then 'Reserve'
when charindex('Payment', text_field,0) > 0 and charindex('Reserve', text_field,0) = 0 then 'Payment'
when charindex('Payment', text_field,0) > 0 and charindex('Reserve', text_field,0) > 0 then 'Both'
else 'N/A' end as tag,
charindex('from: ',text_field,0) as From_Index,
charindex(' to: ',text_field, charindex('from: ',text_field,0)) as to_index,
charindex('.',text_field, charindex(' to: ',text_field, charindex('from: ',text_field,0))) as dot_index
into #text_table2
from #text_table

select *
into #simple
from #text_table2 where tag in ('Reserve','Payment')

select
substring(text_field, 0, charindex('.00. ',text_field,0)+3) as text_field
into #parse
from #text_table2 where tag = 'Both'
union
select
substring(text_field, charindex('.00. ',text_field,0)+5, charindex('.00.',text_field,charindex('.00.',text_field,0)+3))
from #text_table2 where tag = 'Both'

insert into #simple
select *,
case when charindex('Reserve', text_field ,0) > 0 and charindex('Payment', text_field,0) = 0 then 'Reserve'
when charindex('Payment', text_field,0) > 0 and charindex('Reserve', text_field,0) = 0 then 'Payment'
when charindex('Payment', text_field,0) > 0 and charindex('Reserve', text_field,0) > 0 then 'Both'
else 'N/A' end as tag,
charindex('from: ',text_field,0) as From_Index,
charindex(' to: ',text_field, charindex('from: ',text_field,0)) as to_index,
charindex('.',text_field, charindex(' to: ',text_field, charindex('from: ',text_field,0))) as dot_index
from #parse

select *,
substring(text_field, from_index+6, to_index - from_index - 6) as From_Value,
substring(text_field, to_index+4, dot_index - to_index - 1) as To_Value
from #simple

SQL Parse out multiple substrings

A lot of this credit (90%) should go to Alex K who provided an indepth answer regarding finding the nth occurrence of a character

SQL Server - find nth occurrence in a string

I took that answer, adjusted it for your question and then applied a PIVOT to break it into the desired rows/columns. This method should be able to create the desired output for as many unique question sets as you need provided they always have the same logic (each question/answer separated by a line break).

--Creates temporary table for testing, ID column and second set of data
--used to ensure query works for each unique set of questions
IF OBJECT_ID('tempdb..#Results') IS NOT NULL
DROP TABLE #Results

CREATE TABLE #Results
(ID INT IDENTITY(1,1) NOT NULL,
Notes NVARCHAR(4000) NOT NULL)
INSERT INTO #Results
(Notes)
VALUES
('Home Phone: 1234567890
Cell Phone: 1234567890
Date of Birth: 01/01/1971
School Name: James Jones High School
Address:123 Main Street
School City: Queens
School State: PA
School Zip: 32112
Years Teaching: 12
Grade Levels: Middle School
Total Students: 120
Subject: Music:
How did they hear: Other, provide more info: Former partner teacher in the Middle School
Type: Public/Charter '),
('Home Phone: test
Cell Phone: test
Date of Birth: test
School Name: test
Address:test
School City: test
School State: test
School Zip: test
Years Teaching: test
Grade Levels: test
Total Students: test
Subject: test
How did they hear: test
Type: test ');

--Recursive CTE to determine the position of each successive line break
--Used CHARINDEX to search CHAR(13) and CHAR(10) and find line breaks and carriage returns
WITH cte
AS

(SELECT ID, Notes, 1 AS Starts, CHARINDEX(CHAR(13)+CHAR(10),Notes) AS Pos
FROM #Results
UNION ALL
SELECT ID, Notes, Pos +1, CHARINDEX(CHAR(13)+CHAR(10),Notes,Pos+1) AS Pos
FROM cte
WHERE
pos >0),

--2nd CTE breaks each question set into it's own row
cte2
AS
(SELECT ID, Notes,Starts, Pos,
SUBSTRING(Notes, Starts,
CASE
WHEN pos > 0 THEN (pos - starts)
ELSE LEN(notes)
END) AS Token
FROM cte),

--3rd CTE cleans up the data, separating the Questions/Answers into separate columns
--REPLACE is used to remove Line Break (CHAR(10)), output was then showing a TAB so used
--double REPLACE and removed CHAR(9) (tab)
--LTRIM removes leading space
cte3
AS
(SELECT ID,
LTRIM(REPLACE(REPLACE(SUBSTRING(Token,CHARINDEX(CHAR(13)+CHAR(10),Token),CHARINDEX(':',Token)),CHAR(10),''),CHAR(9),'')) AS Question,
LTRIM(SUBSTRING(Token,CHARINDEX(':',Token)+1,4000)) AS Answer
FROM cte2)

--Pivot separates each Question/Answer row into it's own column
SELECT *
FROM
(SELECT ID, Question, Answer
FROM cte3) AS a
PIVOT
(MAX(Answer)
FOR [Question] IN([Address],[Cell Phone],[Date of Birth],[Grade Levels],[Home Phone],[How did they hear],
[School City],[School Name],[School State],[School Zip],[Subject],[Total Students],[Type],[Years Teaching])) AS pvt

I put comments on each section to hopefully explain my logic but let me know if you have any questions.

EDIT: Dynamic Pivot

It is possible to use dynamic SQL to create a PIVOT that will automatically pick up on all the "Question" columns and adjust accordingly. I do not believe it can be done in one step since I had to use the multiple CTEs. What I would do is take the above steps used to create CTE, CTE2, and CTE3 (basically everything before the PIVOT query) and CREATE a VIEW of those steps, then with that view do the following (for my example the view is called "Questionaire")

DECLARE @columns AS NVARCHAR(MAX)
DECLARE @query AS NVARCHAR(MAX)

SET @columns = STUFF((SELECT DISTINCT ',' + QUOTENAME(q.question)
FROM questionaire AS q
FOR XML PATH(''), TYPE
).value('.','NVARCHAR(MAX)')
,1,1,'')

SET @query = 'SELECT ID, '+ @columns +' FROM
(
SELECT ID, Answer, Question
FROM questionaire
) AS a
PIVOT
(
MAX(Answer)
FOR Question IN(' +@columns+')
) AS p'
EXECUTE(@query)

SQL Server: identify / extract multiple values (e.g. IDs) from a STRING to create a comma-separated list

You can use string_split() and string_agg() to accomplish this:

select *
from MSSQL_SAMPLE_DATA_TABLE cross apply
(select string_agg(s.value, ',') as eggs
from string_split(translate(text_example, ',', ' '), ' ') s
where s.value like 'EGG[0-9][0-9][0-9][0-9][0-9]'
) s;

Here is a db<>fiddle.

Extract multiple date strings from a single varchar(MAX) column SQL Server

select R.ReportID,
D.V as DateString
from #ReportSpecs as R
cross apply (select cast(R.ReportSpec as xml)) as X(R)
cross apply X.R.nodes('//@*, //*/text()') as T(X)
cross apply (select T.X.value('.', 'varchar(max)')) as D(V)
where charindex('/20', D.V) > 0

Result:

ReportID    DateString
----------- --------------------------
136 8/16/2002
136 8/23/2002
136 8/16/2002
136 8/23/2002
311 3/25/2002
311 4/4/2002
311 3/25/2002
311 4/4/2002
1131 " CREATEDATE="12/7/2009">

SQL Server, string to be substring in multiple rows

SELECT OrderID, CustomerID, value  
FROM Table1
CROSS APPLY STRING_SPLIT(SerialNo, '-');


Related Topics



Leave a reply



Submit