Selective union possible

oracleunion

Is there any way to make a UNION consider only specific columns when checking for duplicity?

The query below returns all of the columns that I need but, due to slight differences in some of the columns, the UNION results in duplicates based on the FULLPWSID column value. The only column that I would like UNION to consider is FULLPWSID. The order of the tables being queried ensures that I get the most up to date information.

SELECT TRIM(TINWSYS.NUMBER0) AS FULLPWSID,
       TINWSYS.NAME AS SYSNAME,
       TINWSYS.D_PRIN_CITY_SVD_NM AS SYSCITY,
       TINWSYS.D_PRIN_CNTY_SVD_NM AS COUNTY,
       TINWSYS.ACTIVITY_STATUS_CD AS STATUS
       FROM TINWSYS
WHERE (TINWSYS.NAME                   LIKE '%SCENIC WOLF LAUREL #%'   OR
       TINWSYS.D_PRIN_CITY_SVD_NM LIKE '%SCENIC WOLF LAUREL #%'   OR
       TINWSYS.NUMBER0            LIKE '%SCENIC WOLF LAUREL #%')
UNION
SELECT CONCAT('NC',TO_CHAR(PWSID)) AS FULLPWSID,
PRNEW_PWS_INFO.PWS_NAME    AS SYSNAME,
PRNEW_PWS_INFO.CITY_SVD_NM AS SYSCITY,
PRNEW_PWS_INFO.CNTY_SVD_NM AS COUNTY,
TO_CHAR('P')
FROM PRNEW_PWS_INFO
WHERE (PRNEW_PWS_INFO.PWS_NAME  LIKE '%SCENIC WOLF LAUREL #%'   OR
       PRNEW_PWS_INFO.PWSID     LIKE '%SCENIC WOLF LAUREL #%')
ORDER BY FULLPWSID

The results of the query are:

FULLPWSID   SYSNAME                     SYSCITY     COUNTY  STATUS
NC1058009   SCENIC WOLF LAUREL #1                   MADISON A
NC1058010   SCENIC WOLF LAUREL #2                   MADISON A
NC1058011   SCENIC WOLF LAUREL #3       MARS HILL   MADISON A
NC1058012   SCENIC WOLF LAUREL #4                   MADISON A
NC1058015   SCENIC WOLF LAUREL #5       MARS HILL   MADISON A
NC1058015   SCENIC WOLF LAUREL #5A      MARS HILL   MADISON P
NC1058016   SCENIC WOLF LAUREL #6A      MARS HILL   MADISON P
NC1058017   SCENIC WOLF LAUREL #7                   MADISON A
NC1058017   SCENIC WOLF LAUREL #7A      MARS HILL   MADISON P
NC1058018   SCENIC WOLF LAUREL #8       MARS HILL   MADISON A
NC1058018   SCENIC WOLF LAUREL #8A      MARS HILL   MADISON P
NC1058021   SCENIC WOLF LAUREL #9       MARS HILL   MADISON A

As you can see there are 3 duplicates in the FULLPWSID column. I can take care of it in the underlying code but it would be helpful and cleaner to let the query take care of it.

DDL for the PRNEW_PWS_INFO table:

PWSID   NUMBER(7)
PWS_NAME    VARCHAR2(40)
PWS_ST_TYPE_CD  VARCHAR2(4)
CITY_SVD_NM VARCHAR2(40)
CNTY_SVD_NM VARCHAR2(12)
OWNER_NAME  VARCHAR2(40)
OWNER_ADD1  VARCHAR2(40)
OWNER_ADD2  VARCHAR2(40)
OWNER_CITY  VARCHAR2(40)
OWNER_STATE VARCHAR2(2)
OWNER_ZIP   VARCHAR2(10)
OWNER_BUS_PHN   VARCHAR2(12)
APPLICANT_NAME  VARCHAR2(40)
APPLICANT_ADD1  VARCHAR2(40)
APPLICANT_ADD2  VARCHAR2(40)
APPLICANT_CITY  VARCHAR2(40)
APPLICANT_STATE VARCHAR2(2)
APPLICANT_ZIP   VARCHAR2(10)
APPLICANT_BUS_PHN   VARCHAR2(12)
NCID    VARCHAR2(20)
DATE_ASSIGNED   TIMESTAMP(5)
TINWSYS_IS_NUMBER   NUMBER(7)

DDL for the TINWSYS table:

NUMBER0 CHAR(12)
TINWSYS_IS_NUMBER   NUMBER(7)
TINWSYS_ST_CODE CHAR(2)
ALTERNATE_ST_NUM    VARCHAR2(15)
NAME    VARCHAR2(40)
LOCAL_NAME  VARCHAR2(40)
ACTIVITY_STATUS_CD  CHAR(1)
ACTIVITY_DATE   DATE
OWNER_TYPE_CODE CHAR(1)
D_PRIN_CITY_SVD_NM  VARCHAR2(40)
D_PRIN_CNTY_SVD_NM  VARCHAR2(40)
SURF_WTR_RATIO  NUMBER(5,2)
SURF_WTR_PUR_RATIO  NUMBER(5,2)
GRND_WTR_RATIO  NUMBER(5,2)
GRND_WTR_PUR_RATIO  NUMBER(5,2)
GRND_WTR_UDI_RATIO  NUMBER(5,2)
GRND_WTR_UDI_PURCH  NUMBER(5,2)
OPER_CATEGORY_CD    VARCHAR2(5)
D_FED_PRIM_SRC_CD   CHAR(3)
D_ST_PRIM_SRC_CD    CHAR(3)
D_POPULATION_COUNT  NUMBER(10)
D_HISTORY_IND_CD    CHAR(1)
D_TTL_STOR_CAP_MSR  NUMBER(15,3)
PWS_ST_TYPE_CD  CHAR(4)
D_PWS_FED_TYPE_CD   CHAR(4)
D_LAST_INV_UPDT_TS  DATE
D_LAST_UPDT_TS  DATE
D_USERID_CODE   CHAR(8)
LAST_SNTY_SRV_DT    DATE
D_PRT_GW_SR_IND_CD  CHAR(1)
D_TCR_MCL_VIO_DT    DATE
D_INV_CHG_IND_CD    CHAR(1)
D_DSNFCT_GW_IND_CD  CHAR(1)
D_SWGUDI_FL_IND_CD  CHAR(1)
ACTIVITY_REASON_CD  CHAR(2)
DAYS_SERVING_COUNT  NUMBER(3)
D_NEW_WS_FLAG   CHAR(1)
D_INITIAL_TS    DATE
D_INITIAL_USERID    CHAR(8)
D_CDS_TRIGGER_TS    TIMESTAMP(6)
D_CDS_TRIGGER_USER  CHAR(8)
ACTIVITY_RSN_TXT    VARCHAR2(2000)
MEMO_TEXT   VARCHAR2(2000)
STAGE2_CDS_ID   VARCHAR2(22)

Best Answer

You can select from your UNION'd result set and GROUP BY the single column you would like to be unique. Because you can only have one record returned for the other columns, you must aggregate with MAX().

SELECT U.FULLPWSID, 
MAX(U.SYSNAME) AS SYSNAME, 
MAX(U.SYSCITY) AS SYSCITY, 
MAX(U.COUNTY) AS COUNTY, 
MAX(U.STATUS) AS STATUS
FROM (
    SELECT TRIM(TINWSYS.NUMBER0) AS FULLPWSID,
           TINWSYS.NAME AS SYSNAME,
           TINWSYS.D_PRIN_CITY_SVD_NM AS SYSCITY,
           TINWSYS.D_PRIN_CNTY_SVD_NM AS COUNTY,
           TINWSYS.ACTIVITY_STATUS_CD AS STATUS
           FROM TINWSYS
    WHERE (TINWSYS.NAME                   LIKE '%SCENIC WOLF LAUREL #%'   OR
           TINWSYS.D_PRIN_CITY_SVD_NM LIKE '%SCENIC WOLF LAUREL #%'   OR
           TINWSYS.NUMBER0            LIKE '%SCENIC WOLF LAUREL #%')
    UNION
    SELECT CONCAT('NC',TO_CHAR(PWSID)) AS FULLPWSID,
    PRNEW_PWS_INFO.PWS_NAME    AS SYSNAME,
    PRNEW_PWS_INFO.CITY_SVD_NM AS SYSCITY,
    PRNEW_PWS_INFO.CNTY_SVD_NM AS COUNTY,
    TO_CHAR('P')
    FROM PRNEW_PWS_INFO
    WHERE (PRNEW_PWS_INFO.PWS_NAME  LIKE '%SCENIC WOLF LAUREL #%'   OR
           PRNEW_PWS_INFO.PWSID     LIKE '%SCENIC WOLF LAUREL #%')
    ) U
GROUP BY U.FULLPWSID
ORDER BY U.FULLPWSID