Sql-server – SQL parsing XML

sql serverxml

I'm receiving XML files as part of an EDI setup and I'm having an issues getting information from the child elements.

Here is the XML file:

<?xml version="1.0" encoding="UTF-8"?>
<cpi_event>
  <header>
    <source_tenant_uid>My_Coffee_Company_PM_Training</source_tenant_uid>
    <event_type>receipt_received</event_type>
    <event_uid>b631e1bb-d815-496c-bc50-6253f13b8d40</event_uid>
    <event_created_at>2016-07-13 22:26:13.437613</event_created_at>
    <destination_tenant_uid>My_Coffee_Company</destination_tenant_uid>
    <document_type>My_Coffee_Company_OB_Receipt_Received_V1</document_type>
    <document_uid>975_2239_b631e1bb-d815-496c-bc50-6253f13b8d40_0</document_uid>
  </header>
  <content>
    <receipt>
      <received_at>2016-07-13 15:26:13 -0700</received_at>
      <reference_1>ddd</reference_1>
      <reference_2>zzz</reference_2>
      <customer>
        <code>MYX001</code>
        <name>My Coffee Company</name>
        <uuid>5f17ba52-f408-43f7-9dab-f559e10c2b2f</uuid>
      </customer>
      <vendor>
        <code>MY1200</code>
        <name>My Coffee Company</name>
        <uuid>a3c41ccf-a346-4cb0-8717-33dce5c4a3a5</uuid>
      </vendor>
      <receipt_items>
        <receipt_item>
          <code>017-001691</code>
          <uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a70</uuid>
          <unit_quantity>500.0</unit_quantity>
          <unit_of_measure>impressions</unit_of_measure>
          <lot_code>A3</lot_code>
          <expiry_date/>
          <receive_to>
            <code>Q-01-1-C</code>
            <name>Q-01-1-C</name>
          </receive_to>
        </receipt_item>
        <receipt_item>
          <code>017-001692</code>
          <uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a71</uuid>
          <unit_quantity>12500.5</unit_quantity>
          <unit_of_measure>impressions</unit_of_measure>
          <lot_code>A3</lot_code>
          <expiry_date/>
          <receive_to>
            <code>CR15</code>
            <name>Cups Racking 15</name>
          </receive_to>
        </receipt_item>
        <receipt_item>
          <code>017-001693</code>
          <uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a72</uuid>
          <unit_quantity>25002.0</unit_quantity>
          <unit_of_measure>impressions</unit_of_measure>
          <lot_code>A2</lot_code>
          <expiry_date/>
          <receive_to>
            <code>CR15FG</code>
            <name>Cups Racking 15FG</name>
          </receive_to>
        </receipt_item>
        <receipt_item>
          <code>017-0016914</code>
          <uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a73</uuid>
          <unit_quantity>25003.0</unit_quantity>
          <unit_of_measure>impressions</unit_of_measure>
          <lot_code>A1</lot_code>
          <expiry_date/>
          <receive_to>
            <code>CR15A</code>
            <name>Cups Racking 15A</name>
          </receive_to>
        </receipt_item>
      </receipt_items>
    </receipt>
  </content>
</cpi_event>

I'm only concerned with the first step of my process which is extracting the <header> information and the <receipt_item> <uuid> for each item and then inserting that information into a header table in SQL.

Using the sample XML above I would end up with four rows being inserted into the SQL header table. Each row would contain the <header> information and the <receipt_item> <uuid> for one item.

What happens with my current code is it will grab the <header> info and only the last <receipt_item> <uuid>. I've changed my code around and have been able to get it to grab the <header> info and first <receipt_item> <uuid>, but not all four <receipt_item> <uuid>'s.

Here is my current SQL stored proceedure (called from SSIS with filename as input):

USE [PackManger_Sandbox]
GO
/****** Object:  StoredProcedure [dbo].[EDI_XML_HEADER_Import]    Script Date: 7/15/2016 3:45:10 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO

ALTER PROCEDURE [dbo].[EDI_XML_HEADER_Import]
(
    @XML_FILE NVARCHAR(MAX)
)
AS

-- SET NOCOUNT ON added to prevent extra result sets from
    -- interfering with SELECT statements.
    SET NOCOUNT ON;

DECLARE @Event_Type as varchar(50)

-- Message log for logging
DECLARE @msg NVARCHAR(255)

-- Setup XML variable to be used to hold contents of XML file.
DECLARE @xml XML 
-- The command line
DECLARE @COMMAND NVARCHAR(MAX) 
-- The definition of the parameters used within the command line
DECLARE @PARAM_DEF NVARCHAR(500)
-- The parameter used to pass the file name into the command
DECLARE @FILEVAR NVARCHAR(MAX)
-- The output variable that holds the results of the OPENROWSET()
DECLARE @XML_OUT XML 

SET @FILEVAR = @XML_FILE
SET @PARAM_DEF = N'@XML_FILE NVARCHAR(MAX), @XML_OUT XML OUTPUT'

SET @COMMAND = N'SELECT @XML_OUT = BulkColumn FROM OPENROWSET(BULK ''' +  @XML_FILE + ''', SINGLE_BLOB) ROW_SET';

EXEC sp_executesql @COMMAND, @PARAM_DEF, @XML_FILE = @FILEVAR,@XML_OUT = @xml OUTPUT;

DECLARE @hdoc int
DECLARE @WatcherOutputEDIEventType int
DECLARE @sku_uuid nvarchar(100)

EXEC sp_xml_preparedocument @hdoc OUTPUT, @xml

BEGIN TRY
-- Save event_type. Used to set return code which gets returned to SSIS to decide how to process XML
SELECT 
    @Event_Type = XMLhdr.event_type
FROM OPENXML (@hdoc, '/cpi_event/header', 2)
    WITH    (
            event_type nvarchar(50)
            ) AS XMLhdr

IF @Event_Type = 'bom_updated' SET @WatcherOutputEDIEventType = 100;
IF @Event_Type = 'sku_created' SET @WatcherOutputEDIEventType = 200;
IF @Event_Type = 'sku_updated' SET @WatcherOutputEDIEventType = 250;
IF @Event_Type = 'receipt_received' SET @WatcherOutputEDIEventType = 300;
IF @Event_Type = 'job_reconciled' SET @WatcherOutputEDIEventType = 400;
IF @Event_Type = 'shipment_shipped' SET @WatcherOutputEDIEventType = 500;
IF @Event_Type = 'spare1' SET @WatcherOutputEDIEventType = 600;
IF @Event_Type = 'spare2' SET @WatcherOutputEDIEventType = 700;

IF @WatcherOutputEDIEventType > 0 AND @WatcherOutputEDIEventType < 251
    -- Save SKU uuid
    SELECT 
        @sku_uuid = XMLhdr.uuid
    FROM OPENXML (@hdoc, '/cpi_event/content/sku', 2)
        WITH    (
                uuid nvarchar(100)
                ) AS XMLhdr;

ELSE IF @WatcherOutputEDIEventType = 300

    -- Save SKU uuid
    SELECT 
        @sku_uuid = XMLhdr.uuid
    FROM OPENXML (@hdoc, '/cpi_event/content/receipt/receipt_items/receipt_item', 2)
        WITH    (
                uuid nvarchar(100)
                ) AS XMLhdr;

-- Update header table
INSERT INTO
    PackManger_Sandbox.dbo.header
        (
        source_tenant_uid,
        event_type,
        event_uid,
        event_created_at,
        destination_tenant_uid,
        document_type,
        document_uid,
        sku_uuid)
SELECT 
        source_tenant_uid = source_tenant_uid,
        event_type = event_type,
        event_uid = event_uid,
        event_created_at = event_created_at,
        destination_tenant_uid = destination_tenant_uid,
        document_type = document_type,
        document_uid = document_uid,
        uuid = @sku_uuid
    FROM OPENXML (@hdoc, '/cpi_event/header', 2)
    WITH (
        source_tenant_uid nvarchar(255),
        event_type nvarchar(255),
        event_uid nvarchar(255),
        event_created_at datetime,
        destination_tenant_uid nvarchar(255),
        document_type nvarchar(255),
        document_uid nvarchar(255));

        -- Log success
        SET @msg = 'Success updating header table.' + CONVERT(varchar(50), @Event_Type);

    EXEC utility.Log_ProcedureCall_Sandbox 
        @ObjectID = @@PROCID,
        @AdditionalInfo = @msg;
END TRY
BEGIN CATCH
-- Log error
    SET @msg = 'Error updating header table.';

    EXEC utility.Log_ProcedureCall_Sandbox 
        @ObjectID = @@PROCID,
        @AdditionalInfo = @msg;
END CATCH


-- Remove the XML file from memory
EXEC sp_xml_removedocument @hdoc

RETURN @WatcherOutputEDIEventType

I'm trying to avoid looping and doing multiple SQL inserts.

Thanks!

Best Answer

You have a couple of options: 1) try the XML datatype and its methods (eg .nodes, .query, .value) or use OPENXML to drill down to the lowest level then then drill back up using the parent axis operator ('..'). There are pros and cons to each method (which you could research) but try both options with your data and see which works best for you. OPENXML performance is sometimes better with large xml documents and it doesn't have the same problem with the parent axis operator that the xml datatype does. It may however take 1/8th of your buffer pool, up to a maximum of 500MB which is a known 'feature' and why you should always call sp_xml_removedocument.

Here's a simple demo:

DECLARE @xml XML = '<?xml version="1.0" encoding="UTF-8"?>
<cpi_event>
  <header>
    <source_tenant_uid>My_Coffee_Company_PM_Training</source_tenant_uid>
    <event_type>receipt_received</event_type>
    <event_uid>b631e1bb-d815-496c-bc50-6253f13b8d40</event_uid>
    <event_created_at>2016-07-13 22:26:13.437613</event_created_at>
    <destination_tenant_uid>My_Coffee_Company</destination_tenant_uid>
    <document_type>My_Coffee_Company_OB_Receipt_Received_V1</document_type>
    <document_uid>975_2239_b631e1bb-d815-496c-bc50-6253f13b8d40_0</document_uid>
  </header>
  <content>
    <receipt>
      <received_at>2016-07-13 15:26:13 -0700</received_at>
      <reference_1>ddd</reference_1>
      <reference_2>zzz</reference_2>
      <customer>
        <code>MYX001</code>
        <name>My Coffee Company</name>
        <uuid>5f17ba52-f408-43f7-9dab-f559e10c2b2f</uuid>
      </customer>
      <vendor>
        <code>MY1200</code>
        <name>My Coffee Company</name>
        <uuid>a3c41ccf-a346-4cb0-8717-33dce5c4a3a5</uuid>
      </vendor>
      <receipt_items>
        <receipt_item>
          <code>017-001691</code>
          <uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a70</uuid>
          <unit_quantity>500.0</unit_quantity>
          <unit_of_measure>impressions</unit_of_measure>
          <lot_code>A3</lot_code>
          <expiry_date/>
          <receive_to>
            <code>Q-01-1-C</code>
            <name>Q-01-1-C</name>
          </receive_to>
        </receipt_item>
        <receipt_item>
          <code>017-001692</code>
          <uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a71</uuid>
          <unit_quantity>12500.5</unit_quantity>
          <unit_of_measure>impressions</unit_of_measure>
          <lot_code>A3</lot_code>
          <expiry_date/>
          <receive_to>
            <code>CR15</code>
            <name>Cups Racking 15</name>
          </receive_to>
        </receipt_item>
        <receipt_item>
          <code>017-001693</code>
          <uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a72</uuid>
          <unit_quantity>25002.0</unit_quantity>
          <unit_of_measure>impressions</unit_of_measure>
          <lot_code>A2</lot_code>
          <expiry_date/>
          <receive_to>
            <code>CR15FG</code>
            <name>Cups Racking 15FG</name>
          </receive_to>
        </receipt_item>
        <receipt_item>
          <code>017-0016914</code>
          <uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a73</uuid>
          <unit_quantity>25003.0</unit_quantity>
          <unit_of_measure>impressions</unit_of_measure>
          <lot_code>A1</lot_code>
          <expiry_date/>
          <receive_to>
            <code>CR15A</code>
            <name>Cups Racking 15A</name>
          </receive_to>
        </receipt_item>
      </receipt_items>
    </receipt>
  </content>
</cpi_event>'


DECLARE @hdoc int

EXEC sp_xml_preparedocument @hdoc OUTPUT, @xml

SELECT

    h.c.value('(source_tenant_uid/text())[1]', 'nvarchar(255)'),
    h.c.value('(event_type/text())[1]', 'nvarchar(255)'),
    h.c.value('(event_uid/text())[1]', 'nvarchar(255)'),
    h.c.value('(event_created_at/text())[1]', 'nvarchar(255)'),
    h.c.value('(destination_tenant_uid/text())[1]', 'nvarchar(255)'),
    h.c.value('(document_type/text())[1]', 'nvarchar(255)'),
    h.c.value('(document_uid/text())[1]', 'nvarchar(255)'),

    ri.c.value('(uuid/text())[1]', 'nvarchar(255)')

FROM @xml.nodes('/cpi_event') ce(c)
    CROSS APPLY ce.c.nodes('header') h(c) 
    CROSS APPLY ce.c.nodes('content/receipt/receipt_items/receipt_item') ri(c) 


SELECT
        source_tenant_uid = source_tenant_uid,
        event_type = event_type,
        event_uid = event_uid,
        event_created_at = event_created_at,
        destination_tenant_uid = destination_tenant_uid,
        document_type = document_type,
        document_uid = document_uid,
        --uuid = @sku_uuid
        uuid

FROM OPENXML (@hdoc, '/cpi_event/content/receipt/receipt_items/receipt_item', 2)
    WITH (
        source_tenant_uid nvarchar(255) '../../../../header/source_tenant_uid',
        event_type nvarchar(255) '../../../../header/event_type',
        event_uid nvarchar(255) '../../../../header/event_uid',
        event_created_at datetime '../../../../header/event_created_at',
        destination_tenant_uid nvarchar(255) '../../../../header/destination_tenant_uid',
        document_type nvarchar(255) '../../../../header/document_type',
        document_uid nvarchar(255) '../../../../header/document_uid',
        uuid nvarchar(255) 'uuid'
);


-- Remove the XML file from memory
EXEC sp_xml_removedocument @hdoc
GO