Parsing Openxml with Multiple Elements of the Same Name

Parsing OpenXML with multiple elements of the same name

SELECT *
FROM OPENXML (@index, 'rootnode/group/anothernode')
WITH
(
id int '../id',
anothernode varchar(30) '.'
)

Or you can use the XML datatype instead like this:

SELECT G.N.value('(id/text())[1]', 'int') AS id,
A.N.value('text()[1]', 'varchar(30)') AS anothernode
FROM @XMLDoc.nodes('rootnode/group') AS G(N)
CROSS APPLY G.N.nodes('anothernode') AS A(N)

Parsing OpenXML with multiple elements of the same name based on attribute

After some hacking I found that I can get correct result by doing following SELECT:

SELECT *
FROM OPENXML (@doc, 'rootnode/group')
WITH
(
id int 'id',
anothernode varchar(30) 'anothernode[@lang="en"]'
)

Result is:

1   second string
2 I

Main difference is [@lang="en"], which selects specific attribute value.

Parsing OpenXML with elements that are also siblings with the same name

Using XQuery you can shred the XML on group element, and Item element wherever it is located within group. Then you can select required values starting from those 2 elements :

select 
grp.value('@id', 'int') AS GroupId,
grp.value('@status', 'varchar(100)') AS GroupStatus,
grp.value('(order/@OrdId)[1]', 'varchar(100)') AS OrdId,
grp.value('(order/@Type)[1]', 'varchar(100)') AS OrdType,
item.value('@code', 'int') AS ICode,
item.value('@qty', 'int') AS IQty
from @xml.nodes('rootnode/group') A(grp)
outer apply grp.nodes('.//Item') AS B(item)

Assume @xml is XML variable declared as follow :

declare @xml AS XML = '<rootnode>
<group id="00001" status="online">
<order OrdId="42" Type="Sale">
<Item code="1234" qty="1" unitprice="38.00" rank="0" level="0">
<Item code="5678" qty="1" unitprice="11.00" rank="0" level="1">
<Item code="9876" qty="1" unitprice="8.00" rank="0" level="2">
<Tax percent="12" />
</Item>
<Tax percent="12" />
</Item>
<Tax percent="12" />
</Item>
<Item code="7654" qty="1" unitprice="98.00" rank="1" level="0">
<Item code="3211" qty="1" unitprice="8.00" rank="1" level="1">
<Tax percent="12" />
</Item>
<Tax percent="12" />
</Item>
</order>
</group>
</rootnode>'

output :

Sample Image

Selection of columns IPrice, IRank, ILevel and ITax are left as an exercise :)

Parsing XML by OpenXML with multiple Parent nodes with multiple child nodes

This is an up-to-date and state-of-the-art approach with XQuery/XPath methods. The result is the same, just faster and better to read:

DECLARE @XML XML=
'<Report>
<Accounts>
<Account>
<Currency>USD</Currency>
<AccountBalance>45555</AccountBalance>
<Payments>
<PaymentData>
<PaymentCode>502</PaymentCode>
<PaymentAmount currCode="GBP">7000.00000000</PaymentAmount>
</PaymentData>
<PaymentData>
<PaymentCode>501</PaymentCode>
<PaymentAmount currCode="USD">5000.00000000</PaymentAmount>
</PaymentData>
</Payments>
</Account>
<Account>
<Currency>USD</Currency>
<AccountBalance>50000</AccountBalance>
<Payments>
<PaymentData>
<PaymentCode>501</PaymentCode>
<PaymentAmount currCode="USD">5000.00000000</PaymentAmount>
</PaymentData>
</Payments>
</Account>
</Accounts>
</Report>';

SELECT Payment.value('(../../Currency)[1]','nchar(3)') AS currCode
,Payment.value('(../../AccountBalance)[1]','decimal(18,0)') AS AccountBalance
,Payment.value('PaymentCode[1]','nchar(10)') AS PaymentCode
,Payment.value('PaymentAmount[1]/@currCode','nchar(3)') AS PaymentCurrCode
,Payment.value('PaymentAmount[1]','decimal(18,0)') AS PaymentCurrCode
FROM @XML.nodes('Report/Accounts/Account/Payments/PaymentData') AS One(Payment)

Need to parse the XML using OPENXML with Attributes and Elements

Starting from SQL Server 2005 onwards, it is better to use XQuery language, based on the w3c standards, while dealing with the XML data type.

Microsoft proprietary OPENXML and its companions sp_xml_preparedocument and sp_xml_removedocument are kept just for backward compatibility with the obsolete SQL Server 2000.

SQL

DECLARE @xml XML =
'<?xml version="1.0" encoding="UTF-8"?>
<Root>
<dataModel>
<repo profile="SKU" name="SKU"/>
</dataModel>
<SKU repo="SKU">
<SKU_SKU_Number>ABCD/U</SKU_SKU_Number>
<SKU_Advanced_Feature_1 mLang="1">
<value lang="en">SKU_Advanced_Feature1</value>
<value lang="da-DK">SKU_Advanced_Feature2</value>
</SKU_Advanced_Feature_1>
</SKU>
</Root>';

SELECT c.value('(SKU_SKU_Number/text())[1]','VARCHAR(20)') AS SKU_SKU_Number
, y.value('(./text())[1]','VARCHAR(30)') AS SKU_Advanced_Feature_1
, y.value('@lang','VARCHAR(30)') AS Lang
FROM @xml.nodes('/Root/SKU') AS t(c)
CROSS APPLY t.c.nodes('SKU_Advanced_Feature_1/value') AS x(y);

Output

+----------------+------------------------+-------+
| SKU_SKU_Number | SKU_Advanced_Feature_1 | Lang |
+----------------+------------------------+-------+
| ABCD/U | SKU_Advanced_Feature1 | en |
| ABCD/U | SKU_Advanced_Feature2 | da-DK |
+----------------+------------------------+-------+

SQL #2

SELECT c.value('(SKU_SKU_Number/text())[1]','VARCHAR(20)') AS SKU_SKU_Number
, y.value('(./text())[1]','VARCHAR(30)') AS SKU_Advanced_Feature_1
, y.value('@lang','VARCHAR(30)') AS Lang
, y.value('local-name(..)', 'VARCHAR(50)') AS PARENT
FROM @xml.nodes('/Root/SKU') AS t(c)
CROSS APPLY t.c.nodes('*[local-name(.)=("SKU_Advanced_Feature_1","SKU_Approvals_and_Standards")]/value') AS x(y);

Output for the latest XML

+----------------+------------------------+-------+-----------------------------+
| SKU_SKU_Number | SKU_Advanced_Feature_1 | Lang | PARENT |
+----------------+------------------------+-------+-----------------------------+
| ABCDEFG | SKU_Advanced_Feature1 | en | SKU_Advanced_Feature_1 |
| ABCDEFG | SKU_Advanced_Feature2 | da-DK | SKU_Advanced_Feature_1 |
| ABCDEFG | SKU_Advanced_Feature3 | mal | SKU_Advanced_Feature_1 |
| ABCDEFG | ETL listed, UL474 | en | SKU_Approvals_and_Standards |
| ABCDEFG | NULL | da-DK | SKU_Approvals_and_Standards |
| ABCDEFG | CSA 22.2 No. 92 | en | SKU_Approvals_and_Standards |
| ABCDEFG | NULL | da-DK | SKU_Approvals_and_Standards |
+----------------+------------------------+-------+-----------------------------+

SQL #3

SELECT c.value('(SKU_SKU_Number/text())[1]','VARCHAR(20)') AS SKU_SKU_Number
, y.value('(./text())[1]','VARCHAR(30)') AS SKU_Advanced_Feature_1
, y.value('@lang','VARCHAR(30)') AS Lang
, b.value('(attrName/text())[1]','VARCHAR(30)') AS attrName
, y.value('local-name(..)', 'VARCHAR(50)') AS PARENT
FROM @xml.nodes('/Root/SKU') AS t(c)
CROSS APPLY t.c.nodes('*[local-name(.)=("SKU_Advanced_Feature_1","SKU_Approvals_and_Standards")]/value') AS x(y)
CROSS APPLY t.c.nodes('*[local-name(.)=("dynamicAttr")]') AS a(b);

Select from openxml with concatenation multiple nodes value to one string

First of all: FROM OPENXML (together with the stored procedures to prepare and to remove a document) is outdated and should not be used anymore. Rather use the native XML methods provided by the XML data type.

Try it like this:

DECLARE @XML AS XML, @hDoc AS INT

SELECT @XML =
'<offers>
<offer>
<a>AAA1</a>
<param name="B">A1B</param>
<param name="C">A1C</param>
</offer>
<offer>
<a>AAA2</a>
<param name="B">A2B</param>
<param name="C">A2C1</param>
<param name="C">A2C2</param>
</offer>
</offers>';

--The query

WITH cte AS
(
SELECT A.o.value('(a/text())[1]','nvarchar(100)') AS Offer_a
,B.p.value('@name','nvarchar(100)') AS Param_Name
,A.o.query('.') TheOffer
FROM @XML.nodes('/offers/offer') A(o)
CROSS APPLY A.o.nodes('param') B(p)
)
SELECT Offer_a
,MAX(CASE WHEN Param_Name='B' THEN REPLACE(concatParams,' ',' / ') END) AS ParamB
,MAX(CASE WHEN Param_Name='C' THEN REPLACE(concatParams,' ',' / ') END) AS ParamC
FROM cte
CROSS APPLY(SELECT TheOffer.query('data(offer/param[@name=sql:column("Param_Name")]/text())').value('.','nvarchar(max)')) A(concatParams)
GROUP BY Offer_a;

The idea in short:

The cte will return a set of the text() of <a>, the value of param/@name and the XML-fragment <offer> for the corresponding <a>.

The magic happens in the CROSS APPLY(SELECT ...). This sub-select will fetch the params fitting to the current row's Param_Name in a row-wise call. The XQuery-function sql:column() allows to introduce a value of the current row into a XQuery-expression.

Very important is the XQuery-function data(). This will return all data in this path separated by a blank. Regrettfully this function does not allow for a user defined separator.

Therefore a big warning: If your param values include blanks, you will not know, where to separate them... If you need this, please come back with a comment.

The second piece of magic is the grouped aggregation. We group by Offer_a and use MAX() to allow the usage of non-grouping columns. This is an old-fashioned pivot approach...

The replace will put the slashes instead of the blanks (from data()).

UPDATE: If there are blanks in your param values...

Approach 1

We use XQuery/FLWOR to go through the nodes and do the concatenation within Xquery. We can use sql:variable() to introduce a declared variable into the XPath-expression:

DECLARE @delimiter VARCHAR(100)=' / ';

WITH cte AS
(
SELECT A.o.value('(a/text())[1]','nvarchar(100)') AS Offer_a
,B.p.value('@name','nvarchar(100)') AS Param_Name
,A.o.query('.') TheOffer
FROM @XML.nodes('/offers/offer') A(o)
CROSS APPLY A.o.nodes('param') B(p)
)
SELECT Offer_a
,MAX(CASE WHEN Param_Name='B' THEN STUFF(concatParams,1,LEN(@delimiter),'') END) AS ParamB
,MAX(CASE WHEN Param_Name='C' THEN STUFF(concatParams,1,LEN(@delimiter),'') END) AS ParamC
FROM cte
CROSS APPLY(SELECT TheOffer.query('for $p in offer/param[@name=sql:column("Param_Name")]/text()
return <x>{concat(sql:variable("@delimiter"),$p)}</x>
').value('.','nvarchar(max)')) A(concatParams)
GROUP BY Offer_a;

...or Approach 2

We extract all values to an intermediate set and use a correlated sub query together with the FOR XML approach to get the concatenated parameters.

WITH cte AS
(
SELECT A.o.value('(a/text())[1]','nvarchar(100)') AS Offer_a
,B.p.value('@name','nvarchar(100)') AS Param_Name
,A.o.query('.') TheOffer
FROM @XML.nodes('/offers/offer') A(o)
CROSS APPLY A.o.nodes('param') B(p)
)
,cte2 AS
(
SELECT cte.Offer_a
,cte.Param_Name
,A.relatedParams.value('text()[1]','nvarchar(100)') AS ParamValue
FROM cte
CROSS APPLY TheOffer.nodes('offer/param[@name=sql:column("Param_Name")]') A(relatedParams)
)
SELECT Offer_a
,MAX(CASE WHEN Param_Name='B' THEN concatParamValues END) AS paramB
,MAX(CASE WHEN Param_Name='C' THEN concatParamValues END) AS paramC
FROM cte2
CROSS APPLY(SELECT STUFF((SELECT DISTINCT CONCAT(@delimiter,ParamValue)
FROM cte2 csq
WHERE csq.Offer_a=cte2.Offer_a
AND csq.Param_Name=cte2.Param_Name
FOR XML PATH('')),1,LEN(@delimiter),'')) A(concatParamValues)
GROUP BY Offer_a;

... and if your XML might contain forbidden characters use this at the end

CROSS APPLY(SELECT STUFF((SELECT DISTINCT CONCAT(@delimiter,ParamValue) 
FROM cte2 csq
WHERE csq.Offer_a=cte2.Offer_a
AND csq.Param_Name=cte2.Param_Name
FOR XML PATH(''),TYPE).value('.','nvarchar(100)'),1,LEN(@delimiter),'')) A(concatParamValues)

UPDATE 2

You placed an additional question in your comment, but I must admit, I did not really get what you need. If I get this correctly, you are having entities within your values. You did get your result in principles, but these entities remained untranslated? Correct?

In my answer above there is everything you need already. Just to make it clear, a fully working example:

DECLARE @delimiter VARCHAR(100)=' / ';
DECLARE @XML AS XML, @hDoc AS INT

SELECT @XML =
'<offers>
<offer>
<a>AAA1</a>
<param name="B">A1B&</param> <!-- Some typical entities -->
<param name="C">A1C<</param>
</offer>
<offer>
<a>AAA2</a>
<param name="B">A2B after space</param> <!-- A case with some spaces -->
<param name="C">A2C1 A</param> <!-- The A is the capital letter A as entity-->
<param name="C">A2C2</param>
</offer>
</offers>';

WITH cte AS
(
SELECT A.o.value('(a/text())[1]','nvarchar(100)') AS Offer_a
,B.p.value('@name','nvarchar(100)') AS Param_Name
,A.o.query('.') TheOffer
FROM @XML.nodes('/offers/offer') A(o)
CROSS APPLY A.o.nodes('param') B(p)
)
,cte2 AS
(
SELECT cte.Offer_a
,cte.Param_Name
,A.relatedParams.value('text()[1]','nvarchar(100)') AS ParamValue
FROM cte
CROSS APPLY TheOffer.nodes('offer/param[@name=sql:column("Param_Name")]') A(relatedParams)
)
SELECT Offer_a
,MAX(CASE WHEN Param_Name='B' THEN concatParamValues END) AS paramB
,MAX(CASE WHEN Param_Name='C' THEN concatParamValues END) AS paramC
FROM cte2
CROSS APPLY(SELECT STUFF((SELECT DISTINCT CONCAT(@delimiter,ParamValue)
FROM cte2 csq
WHERE csq.Offer_a=cte2.Offer_a
AND csq.Param_Name=cte2.Param_Name
FOR XML PATH(''),TYPE).value('.','nvarchar(100)'),1,LEN(@delimiter),'')) A(concatParamValues)
GROUP BY Offer_a;

The result

Offer_a     paramB              paramC
AAA1 A1B& A1C<
AAA2 A2B after space A2C2 / A2C1 A

The "approach 1" from above would work to the same result:

WITH cte AS
(
SELECT A.o.value('(a/text())[1]','nvarchar(100)') AS Offer_a
,B.p.value('@name','nvarchar(100)') AS Param_Name
,A.o.query('.') TheOffer
FROM @XML.nodes('/offers/offer') A(o)
CROSS APPLY A.o.nodes('param') B(p)
)
SELECT Offer_a
,MAX(CASE WHEN Param_Name='B' THEN STUFF(concatParams,1,LEN(@delimiter),'') END) AS ParamB
,MAX(CASE WHEN Param_Name='C' THEN STUFF(concatParams,1,LEN(@delimiter),'') END) AS ParamC
FROM cte
CROSS APPLY(SELECT TheOffer.query('for $p in offer/param[@name=sql:column("Param_Name")]/text()
return <x>{concat(sql:variable("@delimiter"),$p)}</x>
').value('.','nvarchar(max)')) A(concatParams)
GROUP BY Offer_a;

Finally I hope, that this solves your issues...



Related Topics



Leave a reply



Submit