Sql-server – the fastest way to compare polygon geometry data

geometrysql serversql-server-2012

We have two tables with polygons stored in geometry type columns.

I want to fetch the polygons present in one table that are not present in another table.

As of now I am doing a left outer join and using STAsText(), however that is taking a very long time.

FYI, we have approximately 120 million polygons in both tables.

Is there a fast way of comparing geometry type data? Maybe I need to use the spatial indexing, however I am not aware of this.

SELECT newPolygon.* 
FROM table1 newPolygon 
    LEFT JOIN table2 oldPolygon 
        ON newPolygon.Shape.STAsText() = oldPolygon.Shape.STAsText() 
WHERE oldPolygon.Shape IS NULL

Best Answer

Building on the comments provided by @John M, I've contrived the following example of how to build a geometric index, and then query the index using the STEquals function:

USE tempdb;

DROP TABLE dbo.G1;
DROP TABLE dbo.G2;

CREATE TABLE dbo.G1
(
    G1ID INT NOT NULL CONSTRAINT PK_G1 PRIMARY KEY CLUSTERED IDENTITY(1,1)
    , Shape1 GEOMETRY
);

CREATE SPATIAL INDEX SIndx_G1_Shape1 
ON dbo.G1(Shape1)
WITH (BOUNDING_BOX = (0, 0, 9999, 9999));

CREATE TABLE dbo.G2
(
    G2ID INT NOT NULL CONSTRAINT PK_G2 PRIMARY KEY CLUSTERED IDENTITY(1,1)
    , Shape2 GEOMETRY
);

CREATE SPATIAL INDEX SIndx_G2_Shape2 
ON dbo.G2(Shape2)
WITH (BOUNDING_BOX = (0, 0, 9999, 9999));

DECLARE @c NVARCHAR(50);
SET @c = '';
DECLARE @LoopCount INT;
SET @LoopCount = 0;
WHILE @LoopCount < 5000
BEGIN

    SET @c = 'LINESTRING(2 ' + CONVERT(NVARCHAR(10), @LoopCount) + ', ' 
        + CONVERT(NVARCHAR(10), @LoopCount) + ' 2, 4 ' 
        + CONVERT(NVARCHAR(10), @LoopCount) + ', 2 4, 2 2)';
    IF @LoopCount < 10 PRINT (@c);
    INSERT INTO dbo.G1 (Shape1)
    SELECT geometry::Parse(@c);

    SET @c = 'LINESTRING(' + CONVERT(NVARCHAR(10), @LoopCount) + ' ' 
        + CONVERT(NVARCHAR(10), @LoopCount) + ', ' 
        + CONVERT(NVARCHAR(10), @LoopCount) + ' 2, ' 
        + CONVERT(NVARCHAR(10), @LoopCount + 2) + ' ' 
        + CONVERT(NVARCHAR(10), @LoopCount) + ', 2 4, 2 2)';
    IF @LoopCount < 10 PRINT (@c);
    INSERT INTO dbo.G2 (Shape2)
    SELECT geometry::Parse(@c);

    SET @LoopCount = @LoopCount + 1;
END

SELECT TOP(10) *
FROM dbo.G1;
SELECT TOP(10) *
FROM dbo.G2;

SELECT *
FROM dbo.G1
    INNER JOIN dbo.G2 ON G1.Shape1.STEquals(G2.Shape2) = 1;