Basically, here is the design of the table (the part that is necessary):
id | dt
--------
1 | 2014-05-13
2 | 1900-01-01
3 | 1900-01-01
4 | 2014-05-12
5 | 1900-01-01
So what I am trying to do is to set the 1900-01-01
dates to the data from the above column that is not populated with 1900-01-01
. So the final data would look like this:
id | dt
--------
1 | 2014-05-13
2 | 2014-05-13
3 | 2014-05-13
4 | 2014-05-12
5 | 2014-05-12
There are about 500,000 records being done at a time and it's about 50/50 whether it is a real date or 1900-01-01
. Some of them may have 1 date that goes into 100 1900-01-01
rows.
Right now, this is what I am working with:
UPDATE ISWFN
SET dt = ISWFN2.dt
FROM imp.TBL_DATA ISWFN
JOIN (
SELECT ISWFN.id, MAX(ISWFN2.id) [id2]
FROM imp.TBL_DATA ISWFN
JOIN imp.TBL_DATA ISWFN2 ON
ISWFN.id > ISWFN2.id
WHERE ISWFN.dt = '1900-01-01'
AND ISWFN2.dt != '1900-01-01'
GROUP BY ISWFN.id ) SQ ON
ISWFN.id = SQ.id
JOIN imp.TBL_DATA ISWFN2 ON
SQ.id2 = ISWFN2.id
The problem here is that I end up with a nested loop join with 250,000 records on each side of the join, it makes a row estimate of around 62,500,000,000…
Right now, I have id
as the primary key. There is also a filtered index on dt
where it is 1900-01-01
and an opposite filtered index where it is not 1900-01-01
.
Any thoughts?
Edit: it is really the sub-query (SQ) that is the problem area. That is the part I am trying to tune.
Sorry I don't know how to upload a file… here is the xml for the query plan (for the subquery):
<?xml version="1.0" encoding="UTF-8"?>
<ShowPlanXML xmlns="http://schemas.microsoft.com/sqlserver/2004/07/showplan" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="1.1" Build="10.50.4000.0">
<BatchSequence>
<Batch>
<Statements>
<StmtSimple StatementCompId="1" StatementEstRows="253776" StatementId="1" StatementOptmLevel="FULL" StatementSubTreeCost="177777" StatementText="Statement1" StatementType="SELECT" QueryHash="0x6854D7EBB9341CB2" QueryPlanHash="0xB8F343C168320FED">
<StatementSetOptions ANSI_NULLS="true" ANSI_PADDING="true" ANSI_WARNINGS="true" ARITHABORT="true" CONCAT_NULL_YIELDS_NULL="true" NUMERIC_ROUNDABORT="false" QUOTED_IDENTIFIER="true" />
<QueryPlan CachedPlanSize="32" CompileTime="34" CompileCPU="18" CompileMemory="408">
<RelOp AvgRowSize="15" EstimateCPU="0.362533" EstimateIO="0" EstimateRebinds="0" EstimateRewinds="0" EstimateRows="253776" LogicalOp="Gather Streams" NodeId="0" Parallel="true" PhysicalOp="Parallelism" EstimatedTotalSubtreeCost="177777">
<OutputList>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
<ColumnReference Column="Expr1004" />
</OutputList>
<Parallelism>
<RelOp AvgRowSize="15" EstimateCPU="0.21571" EstimateIO="0" EstimateRebinds="0" EstimateRewinds="0" EstimateRows="253776" LogicalOp="Aggregate" NodeId="1" Parallel="true" PhysicalOp="Stream Aggregate" EstimatedTotalSubtreeCost="177776">
<OutputList>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
<ColumnReference Column="Expr1004" />
</OutputList>
<StreamAggregate>
<DefinedValues>
<DefinedValue>
<ColumnReference Column="Expr1004" />
<ScalarOperator ScalarString="ScalarString1">
<Aggregate AggType="MAX" Distinct="false">
<ScalarOperator>
<Identifier>
<ColumnReference Column="partialagg1005" />
</Identifier>
</ScalarOperator>
</Aggregate>
</ScalarOperator>
</DefinedValue>
</DefinedValues>
<GroupBy>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
</GroupBy>
<RelOp AvgRowSize="15" EstimateCPU="3.36699" EstimateIO="0" EstimateRebinds="0" EstimateRewinds="0" EstimateRows="507553" LogicalOp="Repartition Streams" NodeId="2" Parallel="true" PhysicalOp="Parallelism" EstimatedTotalSubtreeCost="177776">
<OutputList>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
<ColumnReference Column="partialagg1005" />
</OutputList>
<Parallelism PartitioningType="Hash">
<PartitionColumns>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
</PartitionColumns>
<OrderBy>
<OrderByColumn Ascending="true">
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
</OrderByColumn>
</OrderBy>
<RelOp AvgRowSize="15" EstimateCPU="9802.32" EstimateIO="0" EstimateRebinds="0" EstimateRewinds="0" EstimateRows="507553" LogicalOp="Aggregate" NodeId="3" Parallel="true" PhysicalOp="Stream Aggregate" EstimatedTotalSubtreeCost="177773">
<OutputList>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
<ColumnReference Column="partialagg1005" />
</OutputList>
<StreamAggregate>
<DefinedValues>
<DefinedValue>
<ColumnReference Column="partialagg1005" />
<ScalarOperator ScalarString="ScalarString2">
<Aggregate AggType="MAX" Distinct="false">
<ScalarOperator>
<Identifier>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias2" Column="Column1" />
</Identifier>
</ScalarOperator>
</Aggregate>
</ScalarOperator>
</DefinedValue>
</DefinedValues>
<GroupBy>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
</GroupBy>
<RelOp AvgRowSize="15" EstimateCPU="136577" EstimateIO="0" EstimateRebinds="0" EstimateRewinds="0" EstimateRows="32674000000" LogicalOp="Inner Join" NodeId="4" Parallel="true" PhysicalOp="Nested Loops" EstimatedTotalSubtreeCost="167970">
<OutputList>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias2" Column="Column1" />
</OutputList>
<NestedLoops Optimized="false">
<Predicate>
<ScalarOperator ScalarString="ScalarString3">
<Compare CompareOp="GT">
<ScalarOperator>
<Identifier>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Identifier>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias2" Column="Column1" />
</Identifier>
</ScalarOperator>
</Compare>
</ScalarOperator>
</Predicate>
<RelOp AvgRowSize="11" EstimateCPU="0.139637" EstimateIO="0.178475" EstimateRebinds="0" EstimateRewinds="0" EstimateRows="253742" LogicalOp="Index Seek" NodeId="5" Parallel="true" PhysicalOp="Index Seek" EstimatedTotalSubtreeCost="0.318111" TableCardinality="511279">
<OutputList>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
</OutputList>
<IndexScan Ordered="true" ScanDirection="FORWARD" ForcedIndex="false" ForceSeek="false" ForceScan="false" NoExpandHint="false">
<DefinedValues>
<DefinedValue>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column1" />
</DefinedValue>
</DefinedValues>
<Object Database="Database1" Schema="Schema1" Table="Table1" Index="Index1" Filtered="true" Alias="Alias1" IndexKind="NonClustered" />
<SeekPredicates>
<SeekPredicateNew>
<SeekKeys>
<Prefix ScanType="EQ">
<RangeColumns>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias1" Column="Column2" />
</RangeColumns>
<RangeExpressions>
<ScalarOperator ScalarString="ScalarString4">
<Const ConstValue="Value1" />
</ScalarOperator>
</RangeExpressions>
</Prefix>
</SeekKeys>
</SeekPredicateNew>
</SeekPredicates>
</IndexScan>
</RelOp>
<RelOp AvgRowSize="11" EstimateCPU="0.0464568" EstimateIO="0.01" EstimateRebinds="0" EstimateRewinds="253741" EstimateRows="257537" LogicalOp="Lazy Spool" NodeId="6" Parallel="true" PhysicalOp="Table Spool" EstimatedTotalSubtreeCost="11788.5">
<OutputList>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias2" Column="Column1" />
</OutputList>
<Spool>
<RelOp AvgRowSize="11" EstimateCPU="0.283448" EstimateIO="0.183866" EstimateRebinds="0" EstimateRewinds="0" EstimateRows="257537" LogicalOp="Index Scan" NodeId="7" Parallel="true" PhysicalOp="Index Scan" EstimatedTotalSubtreeCost="0.467313" TableCardinality="511279">
<OutputList>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias2" Column="Column1" />
</OutputList>
<IndexScan Ordered="false" ForcedIndex="false" ForceSeek="false" ForceScan="false" NoExpandHint="false">
<DefinedValues>
<DefinedValue>
<ColumnReference Database="Database1" Schema="Schema1" Table="Table1" Alias="Alias2" Column="Column1" />
</DefinedValue>
</DefinedValues>
<Object Database="Database1" Schema="Schema1" Table="Table1" Index="Index2" Filtered="true" Alias="Alias2" IndexKind="NonClustered" />
</IndexScan>
</RelOp>
</Spool>
</RelOp>
</NestedLoops>
</RelOp>
</StreamAggregate>
</RelOp>
</Parallelism>
</RelOp>
</StreamAggregate>
</RelOp>
</Parallelism>
</RelOp>
</QueryPlan>
</StmtSimple>
</Statements>
</Batch>
</BatchSequence>
</ShowPlanXML>
Best Answer
A possible simplification of the
UPDATE
statement that doesn't join the table to itself so many times. The query plan (at SQL-Fiddle, with small number of rows) is a clustered index scan with a clustered index seek for the subquery:Seems to be good enough for the OP's situation. There is possibly some rewriting that produces an even better execution plan.