I'm receiving XML files as part of an EDI setup and I'm having an issues getting information from the child elements.
Here is the XML file:
<?xml version="1.0" encoding="UTF-8"?>
<cpi_event>
<header>
<source_tenant_uid>My_Coffee_Company_PM_Training</source_tenant_uid>
<event_type>receipt_received</event_type>
<event_uid>b631e1bb-d815-496c-bc50-6253f13b8d40</event_uid>
<event_created_at>2016-07-13 22:26:13.437613</event_created_at>
<destination_tenant_uid>My_Coffee_Company</destination_tenant_uid>
<document_type>My_Coffee_Company_OB_Receipt_Received_V1</document_type>
<document_uid>975_2239_b631e1bb-d815-496c-bc50-6253f13b8d40_0</document_uid>
</header>
<content>
<receipt>
<received_at>2016-07-13 15:26:13 -0700</received_at>
<reference_1>ddd</reference_1>
<reference_2>zzz</reference_2>
<customer>
<code>MYX001</code>
<name>My Coffee Company</name>
<uuid>5f17ba52-f408-43f7-9dab-f559e10c2b2f</uuid>
</customer>
<vendor>
<code>MY1200</code>
<name>My Coffee Company</name>
<uuid>a3c41ccf-a346-4cb0-8717-33dce5c4a3a5</uuid>
</vendor>
<receipt_items>
<receipt_item>
<code>017-001691</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a70</uuid>
<unit_quantity>500.0</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A3</lot_code>
<expiry_date/>
<receive_to>
<code>Q-01-1-C</code>
<name>Q-01-1-C</name>
</receive_to>
</receipt_item>
<receipt_item>
<code>017-001692</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a71</uuid>
<unit_quantity>12500.5</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A3</lot_code>
<expiry_date/>
<receive_to>
<code>CR15</code>
<name>Cups Racking 15</name>
</receive_to>
</receipt_item>
<receipt_item>
<code>017-001693</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a72</uuid>
<unit_quantity>25002.0</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A2</lot_code>
<expiry_date/>
<receive_to>
<code>CR15FG</code>
<name>Cups Racking 15FG</name>
</receive_to>
</receipt_item>
<receipt_item>
<code>017-0016914</code>
<uuid>5e24c72b-da35-4b34-b8a6-5f00f5243a73</uuid>
<unit_quantity>25003.0</unit_quantity>
<unit_of_measure>impressions</unit_of_measure>
<lot_code>A1</lot_code>
<expiry_date/>
<receive_to>
<code>CR15A</code>
<name>Cups Racking 15A</name>
</receive_to>
</receipt_item>
</receipt_items>
</receipt>
</content>
</cpi_event>
I'm only concerned with the first step of my process which is extracting the <header>
information and the <receipt_item>
<uuid>
for each item and then inserting that information into a header table in SQL.
Using the sample XML above I would end up with four rows being inserted into the SQL header table. Each row would contain the <header>
information and the <receipt_item>
<uuid>
for one item.
What happens with my current code is it will grab the <header>
info and only the last <receipt_item>
<uuid>
. I've changed my code around and have been able to get it to grab the <header>
info and first <receipt_item>
<uuid>
, but not all four <receipt_item>
<uuid>
's.
Here is my current SQL stored proceedure (called from SSIS with filename as input):
USE [PackManger_Sandbox]
GO
/****** Object: StoredProcedure [dbo].[EDI_XML_HEADER_Import] Script Date: 7/15/2016 3:45:10 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER PROCEDURE [dbo].[EDI_XML_HEADER_Import]
(
@XML_FILE NVARCHAR(MAX)
)
AS
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
DECLARE @Event_Type as varchar(50)
-- Message log for logging
DECLARE @msg NVARCHAR(255)
-- Setup XML variable to be used to hold contents of XML file.
DECLARE @xml XML
-- The command line
DECLARE @COMMAND NVARCHAR(MAX)
-- The definition of the parameters used within the command line
DECLARE @PARAM_DEF NVARCHAR(500)
-- The parameter used to pass the file name into the command
DECLARE @FILEVAR NVARCHAR(MAX)
-- The output variable that holds the results of the OPENROWSET()
DECLARE @XML_OUT XML
SET @FILEVAR = @XML_FILE
SET @PARAM_DEF = N'@XML_FILE NVARCHAR(MAX), @XML_OUT XML OUTPUT'
SET @COMMAND = N'SELECT @XML_OUT = BulkColumn FROM OPENROWSET(BULK ''' + @XML_FILE + ''', SINGLE_BLOB) ROW_SET';
EXEC sp_executesql @COMMAND, @PARAM_DEF, @XML_FILE = @FILEVAR,@XML_OUT = @xml OUTPUT;
DECLARE @hdoc int
DECLARE @WatcherOutputEDIEventType int
DECLARE @sku_uuid nvarchar(100)
EXEC sp_xml_preparedocument @hdoc OUTPUT, @xml
BEGIN TRY
-- Save event_type. Used to set return code which gets returned to SSIS to decide how to process XML
SELECT
@Event_Type = XMLhdr.event_type
FROM OPENXML (@hdoc, '/cpi_event/header', 2)
WITH (
event_type nvarchar(50)
) AS XMLhdr
IF @Event_Type = 'bom_updated' SET @WatcherOutputEDIEventType = 100;
IF @Event_Type = 'sku_created' SET @WatcherOutputEDIEventType = 200;
IF @Event_Type = 'sku_updated' SET @WatcherOutputEDIEventType = 250;
IF @Event_Type = 'receipt_received' SET @WatcherOutputEDIEventType = 300;
IF @Event_Type = 'job_reconciled' SET @WatcherOutputEDIEventType = 400;
IF @Event_Type = 'shipment_shipped' SET @WatcherOutputEDIEventType = 500;
IF @Event_Type = 'spare1' SET @WatcherOutputEDIEventType = 600;
IF @Event_Type = 'spare2' SET @WatcherOutputEDIEventType = 700;
IF @WatcherOutputEDIEventType > 0 AND @WatcherOutputEDIEventType < 251
-- Save SKU uuid
SELECT
@sku_uuid = XMLhdr.uuid
FROM OPENXML (@hdoc, '/cpi_event/content/sku', 2)
WITH (
uuid nvarchar(100)
) AS XMLhdr;
ELSE IF @WatcherOutputEDIEventType = 300
-- Save SKU uuid
SELECT
@sku_uuid = XMLhdr.uuid
FROM OPENXML (@hdoc, '/cpi_event/content/receipt/receipt_items/receipt_item', 2)
WITH (
uuid nvarchar(100)
) AS XMLhdr;
-- Update header table
INSERT INTO
PackManger_Sandbox.dbo.header
(
source_tenant_uid,
event_type,
event_uid,
event_created_at,
destination_tenant_uid,
document_type,
document_uid,
sku_uuid)
SELECT
source_tenant_uid = source_tenant_uid,
event_type = event_type,
event_uid = event_uid,
event_created_at = event_created_at,
destination_tenant_uid = destination_tenant_uid,
document_type = document_type,
document_uid = document_uid,
uuid = @sku_uuid
FROM OPENXML (@hdoc, '/cpi_event/header', 2)
WITH (
source_tenant_uid nvarchar(255),
event_type nvarchar(255),
event_uid nvarchar(255),
event_created_at datetime,
destination_tenant_uid nvarchar(255),
document_type nvarchar(255),
document_uid nvarchar(255));
-- Log success
SET @msg = 'Success updating header table.' + CONVERT(varchar(50), @Event_Type);
EXEC utility.Log_ProcedureCall_Sandbox
@ObjectID = @@PROCID,
@AdditionalInfo = @msg;
END TRY
BEGIN CATCH
-- Log error
SET @msg = 'Error updating header table.';
EXEC utility.Log_ProcedureCall_Sandbox
@ObjectID = @@PROCID,
@AdditionalInfo = @msg;
END CATCH
-- Remove the XML file from memory
EXEC sp_xml_removedocument @hdoc
RETURN @WatcherOutputEDIEventType
I'm trying to avoid looping and doing multiple SQL inserts.
Thanks!
Best Answer
You have a couple of options: 1) try the XML datatype and its methods (eg .nodes, .query, .value) or use
OPENXML
to drill down to the lowest level then then drill back up using the parent axis operator ('..'). There are pros and cons to each method (which you could research) but try both options with your data and see which works best for you.OPENXML
performance is sometimes better with large xml documents and it doesn't have the same problem with the parent axis operator that the xml datatype does. It may however take 1/8th of your buffer pool, up to a maximum of 500MB which is a known 'feature' and why you should always callsp_xml_removedocument
.Here's a simple demo: