Sql-server – Help with FOR XML EXPLICIT query

sql serverxml

I have an audit table constructed (basically) as follows:

CREATE TABLE [dbo].[AUDIT](
[UNIQUE_ID] [int] IDENTITY(1,1) NOT NULL,
[TABLE_NAME] [nvarchar](50) NOT NULL,
[TABLE_UNIQUE_ID] [int] NOT NULL,
[TABLE_USER_ID] [int] NOT NULL,
[XML] [nvarchar](max) NULL,
[DESCRIPTION] [nvarchar](max) NULL,
[TIMESTAMP] [datetime] NOT NULL,
[EXPLANATION] [nvarchar](max) NULL,
[DATE_INDEX] [datetime] NOT NULL CONSTRAINT [DF_SYS_ADUIT_DATE_INDEX] DEFAULT ('01/01/1900')
PRIMARY KEY CLUSTERED 
(
    [UNIQUE_ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

This table represents records and associated actions from other tables, where the TABLE_UNIQUE_ID and TABLE_USER_ID are a composite key from the table they originated, XML is the XML representation of the data, and the DATE_INDEX a date the user defines for record (which doesn't necessarily equal the timestamp). This table is large, ranging from 1-10 million records. The main table that records are inserted from is generally in the thousands to hundreds of thousands range.

In short, each set of TABLE_UNIQUE_ID and TABLE_USER_ID combined (and TABLE_NAME, but I'm not worried about that right now) represents one record in the main table and everything that ever occurred to this record (they are uniquely identified by the UNIQUE_ID)

For example, here's a set of records from this table that represents ONE record and its history in our 'main' table:


enter image description here

So here's what I'm trying to do:

I am trying to write a query that returns an XML formatted document that represents an 'Audit' of any particular date range. That means that the user will enter a date range and a report will be produced that includes all records from the MAIN table in that range and their history (description/explanation fields only) from the AUDIT table, as well as the current state of the record and the very original state of the record. Note that the 'current' record will always be found as the highest UNIQUE_ID in the audit table for a given combination of TABLE_UNIQUE_ID and TABLE_USER_ID. So basically, from the given picture, I should have XML similar to the following:

RECORD>
  <AUDITID>914588</AUDITID>
  <UID>316</UID>
  <UNIQUEID>541</UNIQUEID>
  <CURRENTDATA>
    <Record>
      <Data>Current record data and other xml goes here, not important right now</Data>
    </Record>
  </CURRENTDATA>
  <CURRENTAUDITID>914588</CURRENTAUDITID>
  <HISTORY>
    <TIMESTAMP>8/8/11 9:45</TIMESTAMP>
    <DESCRIPTION>Record was created.</DESCRIPTION>
    <EXPLANATION></EXPLANATION>
  </HISTORY>
  <HISTORY>
    <TIMESTAMP>8/8/11 10:01</TIMESTAMP>
    <DESCRIPTION>Record locked</DESCRIPTION>
    <EXPLANATION></EXPLANATION>
  </HISTORY>
  <HISTORY>
    <TIMESTAMP></TIMESTAMP>
    <DESCRIPTION>Record unlocked</DESCRIPTION>
    <EXPLANATION></EXPLANATION>
  </HISTORY>
  <HISTORY>
    <TIMESTAMP>8/15/11 13:11</TIMESTAMP>
    <DESCRIPTION>Record changes : </DESCRIPTION>
    <EXPLANATION></EXPLANATION>
  </HISTORY>
  <HISTORY>
    <TIMESTAMP>8/15/11 14:13</TIMESTAMP>
    <DESCRIPTION>Record was exported.</DESCRIPTION>
  </HISTORY>
  <ORIGINAL>
    <DATA>
      <Record>
          <Data>Orignal record data and other xml goes here, not important right now</Data>
      </Record>
    </DATA>
  </ORIGINAL>
</RECORD>

I was able to write a query that gets MOST record sets correct, which is something like this:

declare @from nvarchar(20) = '8/8/2011', @through nvarchar(20) = '8/8/2011', @currentRecord int

--rownum = 1 represents the latest record in a set
;with CTERow as (
        SELECT UNIQUE_ID, 
        TABLE_USER_ID, 
        TABLE_UNIQUE_ID,
        TIMESTAMP,
        DATE_INDEX,
        XML,
        DESCRIPTION,
        ROW_NUMBER() OVER(PARTITION BY TABLE_USER_ID, TABLE_UNIQUE_id ORDER BY UNIQUE_ID DESC) AS RowNum 
    FROM [AUDIT]
    where (DATE_INDEX >= @from AND DATE_INDEX <= @through))

--This query selects current records that have a date (AUDIT.DATE_INDEX) that falls between the from/through dates
select 1 AS [TAG], NULL AS PARENT,
    --LEVEL 1
    A.UNIQUE_ID AS [RECORD!1!AUDITID!ELEMENT], 
    A.TABLE_USER_ID AS [RECORD!1!UID!ELEMENT], 
    A.TABLE_UNIQUE_ID AS [RECORD!1!UNIQUEID!ELEMENT],
    (SELECT XML from CTERow where CTErow.Unique_ID = A.UNIQUE_ID AND ROWNUM = 1) AS [RECORD!1!CURRENTDATA!XML], 
    A.UNIQUE_ID AS [RECORD!1!CURRENTAUDITID!XML],
    --LEVEL 2
    NULL as [ORIGINAL!2!DATA!XML],
    NULL as [ORIGINAL!2!ORIGUID!HIDE],

    --LEVEL 3
    NULL AS [HISTORY!3!TIMESTAMP!ELEMENT], 
    NULL AS [HISTORY!3!DESCRIPTION!ELEMENT],
    NULL AS [HISTORY!3!EXPLANATION!ELEMENT]

from AUDIT A
WHERE 
    (SELECT XML from CTERow where CTErow.Unique_ID = A.UNIQUE_ID) IS NOT NULL -- make sure that we're pulling the latest record of a set
    AND (A.DATE_INDEX >= @from AND A.DATE_INDEX <= @through)


UNION ALL

-- This query pulls the original record
SELECT 2 AS [TAG], 1 AS [PARENT],
    --LEVEL 1
    C.UNIQUE_ID,
    C.TABLE_USER_ID, 
    C.TABLE_UNIQUE_ID,
    NULL,
    NULL,
    --LEVEL 2
    C.[XML],
    null,
    --LEVEL 3
    NULL,
    NULL, 
    NULL
from AUDIT C
where (C.DATE_INDEX >= @from AND C.DATE_INDEX <= @through) --or 
and C.DESCRIPTION like '%record was created%'


UNION ALL

--This query pulls the history of the record
select 3 AS [TAG], 1 AS [PARENT],
    --LEVEL 1
    UNIQUE_ID,
    TABLE_USER_ID, 
    TABLE_UNIQUE_ID,
    NULL,
    NULL,
    --LEVEL 3
    NULL, 
    NULL,
    --LEVEL 2
    B.[TIMESTAMP],  
    B.[DESCRIPTION], 
    B.[EXPLANATION]
from AUDIT B
where (B.DATE_INDEX >= @from AND B.DATE_INDEX <= @through) 

ORDER BY [RECORD!1!UID!ELEMENT], [RECORD!1!UNIQUEID!ELEMENT], [ORIGINAL!2!DATA!XML], [HISTORY!3!TIMESTAMP!ELEMENT]
FOR XML EXPLICIT

However, I am running into problems where I don't capture all the history OR original records (because the DATE_INDEX on the historical records don't necessarily fall in the date range defined by the user).

My question is – what might be the best way to produce output, for a given date range (which will be queried in the date_index column), that selects the following:

  • the xml of the current record in the main table (this is represented by the the last/highest unique_id of a record set that has XML attached) with a date_index in the date range

  • the xml of the above original record (will always have a description with 'record was created' in it)

  • the description/explanations of all historical records

Thanks in advance for the help! This has been wracking my brain. Note that I am re-writing the original query, which had HORRENDOUS performance due to lots of joins on the main table (which is also large). Oh yeah, there should be another couple of columns in the index, but I'm more concerned with getting the correct result set first and THEN I'll consider the performance next.

Best Answer

You could carve off the data you are interested in into a temp table and work with it there. I've used FOR XML PATH in this example as it's easier to work with:

USE tempdb
GO

IF OBJECT_ID('[dbo].[AUDIT]') IS NOT NULL DROP TABLE [dbo].[AUDIT]
GO

CREATE TABLE [dbo].[AUDIT](

    [UNIQUE_ID] [int], --IDENTITY(1,1) NOT NULL,
    [TABLE_NAME] [nvarchar](50) NOT NULL,
    [TABLE_UNIQUE_ID] [int] NOT NULL,
    [TABLE_USER_ID] [int] NOT NULL,
    [XML] [nvarchar](MAX) NULL,
    [DESCRIPTION] [nvarchar](MAX) NULL,
    [TIMESTAMP] [datetime] NOT NULL,
    [EXPLANATION] [nvarchar](MAX) NULL,
    [DATE_INDEX] [datetime] NOT NULL CONSTRAINT [DF_SYS_ADUIT_DATE_INDEX] DEFAULT ('01/01/1900')

PRIMARY KEY CLUSTERED 
(
    [UNIQUE_ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO



INSERT INTO [dbo].[AUDIT] ( UNIQUE_ID, TABLE_NAME, TABLE_UNIQUE_ID, TABLE_USER_ID, XML, DESCRIPTION, TIMESTAMP, EXPLANATION, DATE_INDEX )
VALUES
    ( 848098, 'MAIN', 500187, 317, '<Record><Data>Original record data and other xml goes here, not important right now</Data></Record>', 'Record was created', '8 Aug 2011 9:45', NULL, '1 Jan 1900' ),
    ( 849901, 'MAIN', 500187, 317, '<Record/>', 'Record locked', '8 Aug 2011 10:01', NULL, '1 Jan 1900' ),
    ( 854269, 'MAIN', 500187, 317, '<Record/>', 'Record unlocked', '8 Aug 2011 10:51', NULL, '3 Aug 2011' ),
    ( 911382, 'MAIN', 500187, 317, '<Record><Data>Change record data and other xml goes here, not important right now</Data></Record>', 'Record changes:', '8 Aug 2011 10:52', NULL, '8 Aug 2011' ),
    ( 927795, 'MAIN', 500187, 317, '<Record/>', 'Record locked', '15 Aug 2011 13:11', NULL, '1 Jan 1900' ),
    ( 931013, 'MAIN', 500187, 317, '<Record><Data>Current record data and other xml goes here, not important right now</Data></Record>', 'Record was exported', '15 Aug 2011 14:13', NULL, '1 Jan 1900' )
GO


IF OBJECT_ID('tempdb..#tmp') IS NOT NULL DROP TABLE #tmp

DECLARE @from DATETIME = '8 Aug 2011 00:00', @through DATETIME = '8 Aug 2011 23:59:59:999', @currentRecord int

-- Get all records for that id and date range into a temp table
SELECT
    UNIQUE_ID, 
    TABLE_USER_ID, 
    TABLE_UNIQUE_ID,
    [TIMESTAMP],
    DATE_INDEX,
    [XML],
    [DESCRIPTION]
INTO #tmp
FROM dbo.[AUDIT]
WHERE DATE_INDEX >= @from 
AND DATE_INDEX <= @through
--AND presumably some other criteria to limit the records ...
AND TABLE_UNIQUE_ID = 500187
AND TABLE_USER_ID = 317

-- Add unique index and RowNum column
ALTER TABLE #tmp ADD PRIMARY KEY ( UNIQUE_ID )
ALTER TABLE #tmp ADD RowNum INT

-- Get any associated records (date criteria not used), which don't already exist
INSERT INTO #tmp ( UNIQUE_ID, TABLE_USER_ID, TABLE_UNIQUE_ID, TIMESTAMP, DATE_INDEX, XML, DESCRIPTION )
SELECT
    UNIQUE_ID, 
    TABLE_USER_ID, 
    TABLE_UNIQUE_ID,
    [TIMESTAMP],
    DATE_INDEX,
    [XML],
    [DESCRIPTION]
FROM dbo.[AUDIT] a
WHERE EXISTS ( SELECT * FROM #tmp t WHERE a.TABLE_UNIQUE_ID = t.TABLE_UNIQUE_ID AND a.TABLE_USER_ID = t.TABLE_USER_ID )
  AND NOT EXISTS ( SELECT * FROM #tmp u WHERE a.UNIQUE_ID = u.UNIQUE_ID )


-- Add the row number
;WITH cte AS
(
SELECT ROW_NUMBER() OVER(PARTITION BY TABLE_USER_ID, TABLE_UNIQUE_ID ORDER BY UNIQUE_ID ) x, *  
FROM #tmp
)
UPDATE cte
SET RowNum = x


-- Have a look at the data before we create the xml
SELECT 't' s, * FROM #tmp
GO


SELECT
    (
    SELECT
        UNIQUE_ID AS AUDITID,
        TABLE_USER_ID AS [UID],
        TABLE_UNIQUE_ID AS UNIQUEID,
        CAST( [XML] AS XML ) AS "CURRENTDATA/*"
    FROM #tmp
    WHERE RowNum = ( SELECT MAX(RowNum) FROM #tmp )
    FOR XML PATH(''), TYPE
    ),
    (
    SELECT
        [TIMESTAMP] AS [TIMESTAMP],
        [DESCRIPTION] AS [DESCRIPTION],
        '' AS EXPLANATION
    FROM #tmp
    WHERE RowNum > 1
      --AND RowNum = ( SELECT MAX(RowNum) FROM #tmp )   -- ??
    FOR XML PATH('HISTORY'), TYPE
    ),
    (
    SELECT
        CAST( [XML] AS XML ) AS "DATA/*"
    FROM #tmp
    WHERE RowNum = 1
    FOR XML PATH('ORIGINAL'), TYPE
    )

FOR XML PATH('RECORD'), TYPE
GO