All Forums Database
Sumit Wadhwani 2 posts Joined 06/13
18 Mar 2015
Need suggestions to optimize the complex query (below) involving several Joins and Aggregation..

Hi,
I have below query which is failing in dev environment due to spool issue:
 
INSERT INTO D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_DETAILS(MATERIAL_ID,
VALUATION_AREA_ID, PLANT_ID, STORAGE_LOCATION_ID, CUSTOMER_ID,
VENDOR_ID, WBS_ELEMENT_ID, FISCAL_PERIOD_CODE, BEGIN_DATE, END_DATE,
UNRESTRICTED_STOCK, STOCK_IN_TRANSFER, QA_STOCK, BLOCKED_STOCK,
RESTRICTED_STOCK, RETURNS_STOCK, TAS_CREATE_DATE, TAS_CHANGE_DATE,
TAS_EXTRACTION_DATE, DWH_DELETION_FLAG)
SELECT
VW_STG_TEMP_STOCK_DETAILS_C.MATERIAL_ID, VW_STG_TEMP_STOCK_DETAILS_C.VALUATION_AREA_ID,
VW_STG_TEMP_STOCK_DETAILS_C.PLANT_ID, VW_STG_TEMP_STOCK_DETAILS_C.STORAGE_LOCATION_ID,
VW_STG_TEMP_STOCK_DETAILS_C.CUSTOMER_ID, VW_STG_TEMP_STOCK_DETAILS_C.VENDOR_ID,
VW_STG_TEMP_STOCK_DETAILS_C.WBS_ELEMENT_ID, VW_STG_TEMP_STOCK_DETAILS_C.FISCAL_PERIOD_CODE,
CAST(CAST(VW_STG_TEMP_STOCK_DETAILS_C.BEGIN_DATE AS TIMESTAMP(6)) AS DATE),
CAST(CAST(VW_STG_TEMP_STOCK_DETAILS_C.END_DATE AS TIMESTAMP(6)) AS DATE),
VW_STG_TEMP_STOCK_DETAILS_C.UNRESTRICTED_STOCK, VW_STG_TEMP_STOCK_DETAILS_C.STOCK_IN_TRANSFER,
VW_STG_TEMP_STOCK_DETAILS_C.QA_STOCK, VW_STG_TEMP_STOCK_DETAILS_C.BLOCKED_STOCK,
VW_STG_TEMP_STOCK_DETAILS_C.RESTRICTED_STOCK, VW_STG_TEMP_STOCK_DETAILS_C.RETURNS_STOCK,
CAST(VW_STG_TEMP_STOCK_DETAILS_C.TAS_CREATE_DATE AS TIMESTAMP(0)),
CAST(VW_STG_TEMP_STOCK_DETAILS_C.TAS_CHANGE_DATE AS TIMESTAMP(0)),
CAST(VW_STG_TEMP_STOCK_DETAILS_C.TAS_EXTRACTION_DATE AS TIMESTAMP(0)),
VW_STG_TEMP_STOCK_DETAILS_C.DWH_DELETION_FLAG
FROM
D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C VW_STG_TEMP_STOCK_DETAILS_C
 
SHOW SEL * FROM D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C;
SELECT
Q1.MATERIAL_ID,
T9.VALUATION_AREA_ID,
Q1.PLANT_ID,
Q1.STORAGE_LOCATION_ID,
Q1.CUSTOMER_ID,
Q1.VENDOR_ID,
Q1.WBS_ELEMENT_ID,
'FP' || T9.FISCAL_PERIOD AS FISCAL_PERIOD_CODE,
MAX(T9.BEGIN_DATE) AS BEGIN_DATE,
MAX(T9.END_DATE) AS END_DATE,
SUM(Q1.UNRESTRICTED_STOCK) AS UNRESTRICTED_STOCK,
SUM(Q1.STOCK_IN_TRANSFER) AS STOCK_IN_TRANSFER,
SUM(Q1.QA_STOCK) AS QA_STOCK,
SUM(Q1.BLOCKED_STOCK) AS BLOCKED_STOCK,
SUM(Q1.RESTRICTED_STOCK) AS RESTRICTED_STOCK,
SUM(Q1.RETURNS_STOCK) AS RETURNS_STOCK,
MIN(Q1.TAS_CREATE_DATE) AS TAS_CREATE_DATE,
MAX(Q1.TAS_CHANGE_DATE) AS TAS_CHANGE_DATE,
MAX(Q1.TAS_CHANGE_DATE) AS TAS_EXTRACTION_DATE,
MAX(Q1.DWH_DELETION_FLAG) AS DWH_DELETION_FLAG
FROM
 
(SELECT
T1
.TAS_SOURCE_ID,
T1.MATERIAL_ID,
T1.PLANT_ID,
CAST('*' AS CHAR(4)) AS STORAGE_LOCATION_ID,
CAST('*' AS VARCHAR(10)) AS CUSTOMER_ID,
COALESCE(T1.VENDOR_ID,CAST('*' AS VARCHAR(10))) AS VENDOR_ID,
CAST('*' AS VARCHAR(8)) AS WBS_ELEMENT_ID,
T1.PERIOD_END_DATE,
T1.FISCAL_PERIOD,
T1.UNRESTRICTED_STOCK,
CAST('0.000' AS DECIMAL(13,3)) AS STOCK_IN_TRANSFER,
T1.QA_STOCK,
CAST('0.000' AS DECIMAL(13,3)) AS BLOCKED_STOCK,
T1.RESTRICTED_STOCK,
CAST('0.000' AS DECIMAL(13,3)) AS RETURNS_STOCK,
T1.TAS_CREATE_DATE,
T1.TAS_CHANGE_DATE,
T1.TAS_EXTRACTION_DATE,
T1.DWH_DELETION_FLAG
FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_VENDOR T1
/* INNER JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T2
ON T2.PARAMETER_VALUE = T1.TAS_SOURCE_ID
WHERE T2.PARAMETER_NAME = 'SOURCE_ID'*/
UNION ALL
SELECT
T3
.TAS_SOURCE_ID,
T3.MATERIAL_ID,
T3.PLANT_ID,
T3.STORAGE_LOCATION_ID,
CAST('*' AS VARCHAR(10)) AS CUSTOMER_ID,
CAST('*' AS VARCHAR(10)) AS VENDOR_ID,
T3.WBS_ELEMENT_ID,
T3.PERIOD_END_DATE,
T3.FISCAL_PERIOD,
T3.UNRESTRICTED_STOCK,
CAST('0.000' AS DECIMAL(13,3)) AS STOCK_IN_TRANSFER,
T3.QA_STOCK,
T3.BLOCKED_STOCK,
T3.RESTRICTED_STOCK,
CAST('0.000' AS DECIMAL(13,3)) AS RETURNS_STOCK,
T3.TAS_CREATE_DATE,
T3.TAS_CHANGE_DATE,
T3.TAS_EXTRACTION_DATE,
T3.DWH_DELETION_FLAG
FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_PROJ T3
/* INNER JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T4
ON T4.PARAMETER_VALUE = T3.TAS_SOURCE_ID
WHERE T4.PARAMETER_NAME = 'SOURCE_ID'*/
UNION ALL
SELECT
T5
.TAS_SOURCE_ID,
T5.MATERIAL_ID,
T5.PLANT_ID,
CAST('*' AS CHAR(4)) AS STORAGE_LOCATION_ID,
T5.CUSTOMER_ID,
CAST('*' AS VARCHAR(10)) AS VENDOR_ID,
CAST('*' AS VARCHAR(8)) AS WBS_ELEMENT_ID,
T5.PERIOD_END_DATE,
T5.FISCAL_PERIOD,
T5.UNRESTRICTED_STOCK,
CAST('0.000' AS DECIMAL(13,3)) AS STOCK_IN_TRANSFER,
T5.QA_STOCK,
CAST('0.000' AS DECIMAL(13,3)) AS BLOCKED_STOCK,
T5.RESTRICTED_STOCK,
CAST('0.000' AS DECIMAL(13,3)) AS RETURNS_STOCK,
T5.TAS_CREATE_DATE,
T5.TAS_CHANGE_DATE,
T5.TAS_EXTRACTION_DATE,
T5.DWH_DELETION_FLAG
FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_CUSTOMER T5
/* INNER JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T6
ON T6.PARAMETER_VALUE = T5.TAS_SOURCE_ID
WHERE T6.PARAMETER_NAME = 'SOURCE_ID'*/
UNION ALL
SELECT
T7
.TAS_SOURCE_ID,
T7.MATERIAL_ID,
T7.PLANT_ID,
T7.STORAGE_LOCATION_ID,
CAST('*' AS VARCHAR(10)) AS CUSTOMER_ID,
CAST('*' AS VARCHAR(10)) AS VENDOR_ID,
CAST('*' AS VARCHAR(8)) AS WBS_ELEMENT_ID,
T7.PERIOD_END_DATE,
T7.FISCAL_PERIOD,
T7.UNRESTRICTED_STOCK,
T7.STOCK_IN_TRANSFER,
T7.STOCK_IN_QA,
T7.BLOCKED_STOCK,
T7.RESTRICTED_STOCK,
T7.RETURNS_STOCK,
T7.TAS_CREATE_DATE,
T7.TAS_CHANGE_DATE,
T7.TAS_EXTRACTION_DATE,
T7.DWH_DELETION_FLAG
FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_STLOC T7
/* INNER JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T8
ON T8.PARAMETER_VALUE = T7.TAS_SOURCE_ID
WHERE T8.PARAMETER_NAME = 'SOURCE_ID'*/
) Q1
INNER
 
JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T8
ON
 
T8.PARAMETER_VALUE = Q1.TAS_SOURCE_ID
-- This join sets all stock records on the timeline so there are no gaps
INNER
 
JOIN D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_PERIOD_LIST T9
ON T9.MATERIAL_ID = Q1.MATERIAL_ID
AND T9.PLANT_ID = Q1.PLANT_ID
AND T9.END_DATE >= Q1.PERIOD_END_DATE
WHERE
 
T8.PARAMETER_NAME = 'SOURCE_ID'
GROUP
 
BY
Q1.MATERIAL_ID,
T9.VALUATION_AREA_ID,
Q1.PLANT_ID,
Q1.STORAGE_LOCATION_ID,
Q1.CUSTOMER_ID,
Q1.VENDOR_ID,
Q1.WBS_ELEMENT_ID,
T9.FISCAL_PERIOD
;
 
Explain Plan: 
1) FIRST, we LOCK a DISTINCT D0_IM_RSTG_TAS_MD4."pseudo table" FOR
WRITE ON a RowHash TO prevent GLOBAL deadlock FOR
D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_DETAILS.
2) NEXT, we LOCK D0_IM_CORE_T.T1 IN VIEW
D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C FOR ACCESS, we LOCK
D0_IM_CORE_T.T7 IN VIEW
D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C FOR ACCESS, we LOCK
D0_IM_CORE_T.T3 IN VIEW
D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C FOR ACCESS, we LOCK
D0_IM_CORE_T.T5 IN VIEW
D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C FOR ACCESS, we LOCK
D0_IM_RSTG_TAS_MD4.T9 IN VIEW VW_STG_TEMP_STOCK_DETAILS_C FOR
ACCESS, we LOCK D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_DETAILS FOR
WRITE, AND we LOCK D0_IM_RSTG_TAS_MD4.T8 IN VIEW
VW_STG_TEMP_STOCK_DETAILS_C FOR ACCESS.
3) We DO an ALL-AMPs RETRIEVE step FROM D0_IM_CORE_T.T1 IN VIEW
D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C BY way OF an
ALL-ROWS scan WITH NO residual conditions INTO SPOOL 1 (all_amps),
which IS built locally ON the AMPs. The SIZE OF SPOOL 1 IS
estimated WITH HIGH confidence TO be 29,112 ROWS (4,308,576 BYTES).
The estimated TIME FOR this step IS 0.03 seconds.
4) We DO an ALL-AMPs RETRIEVE step FROM D0_IM_CORE_T.T3 IN VIEW
D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C BY way OF an
ALL-ROWS scan WITH NO residual conditions INTO SPOOL 1 (all_amps),
which IS built locally ON the AMPs. The SIZE OF SPOOL 1 IS
estimated WITH HIGH confidence TO be 705,824 ROWS (104,461,952
BYTES). The estimated TIME FOR this step IS 0.38 seconds.
5) We DO an ALL-AMPs RETRIEVE step FROM D0_IM_CORE_T.T5 IN VIEW
D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C BY way OF an
ALL-ROWS scan WITH NO residual conditions INTO SPOOL 1 (all_amps),
which IS built locally ON the AMPs. The SIZE OF SPOOL 1 IS
estimated WITH HIGH confidence TO be 3,813,063 ROWS (564,333,324
BYTES). The estimated TIME FOR this step IS 1.49 seconds.
6) We EXECUTE the FOLLOWING steps IN parallel.
1) We DO an ALL-AMPs RETRIEVE step FROM D0_IM_CORE_T.T7 IN VIEW
D0_IM_RSTG_TAS_MD4.VW_STG_TEMP_STOCK_DETAILS_C BY way OF an
ALL-ROWS scan WITH NO residual conditions INTO SPOOL 1
(all_amps), which IS built locally ON the AMPs. The SIZE OF
SPOOL 1 IS estimated WITH HIGH confidence TO be 90,202,368
ROWS (13,349,950,464 BYTES). The estimated TIME FOR this
step IS 49.75 seconds.
2) We DO an ALL-AMPs RETRIEVE step FROM D0_IM_RSTG_TAS_MD4.T8 IN
VIEW VW_STG_TEMP_STOCK_DETAILS_C BY way OF an ALL-ROWS scan
WITH a CONDITION OF ("D0_IM_RSTG_TAS_MD4.T8 in view
VW_STG_TEMP_STOCK_DETAILS_C.PARAMETER_NAME = 'SOURCE_ID'"
)
INTO SPOOL 5 (all_amps), which IS duplicated ON ALL AMPs.
The SIZE OF SPOOL 5 IS estimated WITH LOW confidence TO be 48
ROWS (8,880 BYTES). The estimated TIME FOR this step IS 0.03
seconds.
7) We DO an ALL-AMPs JOIN step FROM SPOOL 5 (LAST USE) BY way OF an
ALL-ROWS scan, which IS joined TO SPOOL 1 (LAST USE) BY way OF an
ALL-ROWS scan. SPOOL 5 AND SPOOL 1 are joined USING a product
JOIN, WITH a JOIN CONDITION OF ("PARAMETER_VALUE =
Q1.TAS_SOURCE_ID"
). The RESULT goes INTO SPOOL 6 (all_amps),
which IS redistributed BY the HASH code OF (Q1.MATERIAL_ID,
Q1.PLANT_ID) TO ALL AMPs. THEN we DO a SORT TO ORDER SPOOL 6 BY
ROW HASH. The SIZE OF SPOOL 6 IS estimated WITH LOW confidence TO
be 22,550,592 ROWS (3,157,082,880 BYTES). The estimated TIME FOR
this step IS 52.49 seconds.
8) We DO an ALL-AMPs JOIN step FROM SPOOL 6 (LAST USE) BY way OF a
RowHash match scan, which IS joined TO D0_IM_RSTG_TAS_MD4.T9 IN
VIEW VW_STG_TEMP_STOCK_DETAILS_C BY way OF a RowHash match scan
WITH a CONDITION OF ("NOT (D0_IM_RSTG_TAS_MD4.T9 in view
VW_STG_TEMP_STOCK_DETAILS_C.MATERIAL_ID IS NULL)"
). SPOOL 6 AND
D0_IM_RSTG_TAS_MD4.T9 are joined USING a MERGE JOIN, WITH a JOIN
CONDITION OF ("(D0_IM_RSTG_TAS_MD4.T9.MATERIAL_ID = MATERIAL_ID)
AND ((D0_IM_RSTG_TAS_MD4.T9.PLANT_ID = PLANT_ID) AND
(D0_IM_RSTG_TAS_MD4.T9.END_DATE >= PERIOD_END_DATE ))"
). The
INPUT TABLE D0_IM_RSTG_TAS_MD4.T9 will NOT be cached IN memory.
The RESULT goes INTO SPOOL 4 (all_amps), which IS built locally ON
the AMPs. The RESULT SPOOL file will NOT be cached IN memory.
The SIZE OF SPOOL 4 IS estimated WITH INDEX JOIN confidence TO be
3,277,116,351 ROWS (544,001,314,266 BYTES). The estimated TIME
FOR this step IS 28 minutes AND 2 seconds.
9) We DO an ALL-AMPs SUM step TO AGGREGATE FROM SPOOL 4 (LAST USE) BY
way OF an ALL-ROWS scan , GROUPING BY field1 ( Q1.MATERIAL_ID
,D0_IM_RSTG_TAS_MD4.T9.VALUATION_AREA_ID ,Q1.PLANT_ID
,Q1.STORAGE_LOCATION_ID ,Q1.CUSTOMER_ID ,Q1.VENDOR_ID
,Q1.WBS_ELEMENT_ID ,D0_IM_RSTG_TAS_MD4.T9.FISCAL_PERIOD).
AGGREGATE Intermediate Results are computed locally, THEN placed
IN SPOOL 7. The AGGREGATE SPOOL file will NOT be cached IN memory.
The SIZE OF SPOOL 7 IS estimated WITH LOW confidence TO be
2,457,837,264 ROWS (757,013,877,312 BYTES). The estimated TIME
FOR this step IS 1 HOUR AND 58 minutes.
10) We DO an ALL-AMPs RETRIEVE step FROM SPOOL 7 (LAST USE) BY way OF
an ALL-ROWS scan INTO SPOOL 2 (all_amps), which IS redistributed
BY the HASH code OF (Q1.MATERIAL_ID,
D0_IM_RSTG_TAS_MD4.T9.VALUATION_AREA_ID) TO ALL AMPs. THEN we DO
a SORT TO ORDER SPOOL 2 BY ROW HASH. The RESULT SPOOL file will
NOT be cached IN memory. The SIZE OF SPOOL 2 IS estimated WITH
LOW confidence TO be 2,457,837,264 ROWS (437,495,032,992 BYTES).
The estimated TIME FOR this step IS 1 HOUR AND 58 minutes.
11) We DO an ALL-AMPs MERGE INTO
D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_DETAILS FROM SPOOL 2 (LAST USE).
The SIZE IS estimated WITH LOW confidence TO be 2,457,837,264 ROWS.
The estimated TIME FOR this step IS 7 hours AND 2 minutes.
12) We spoil the parser's dictionary cache for the table.
13) Finally, we send out an END TRANSACTION step to all AMPs involved
in processing the request.
-> No rows are returned to the user as the result of statement 1.
 
ROWCOUNT:
SELCOUNT(*) FROM D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS
54
SEL COUNT(*) FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_STLOC
86,389,305
SELCOUNT(*) FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_CUSTOMER;
3,107,239
SELCOUNT(*) FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_PROJ;
676,712
SELCOUNT(*) FROM D0_IM_CORE_T.CORE_MATERIAL_STOCK_VENDOR;
29,112
SELCOUNT(*) FROM D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_PERIOD_LIST
1,475,950,187
 
STATISTICS:
I have collected statistics on all the tables..
HELP STATISTICS D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_PERIOD_LIST
Date Time Unique Values Column Names Column Dictionary Names Column SQL Names Column Names UEscape
1 15/03/18 07:36:37        1,475,950,187 * * "*" ?
2 15/03/18 07:34:50           10,156,353 MATERIAL_ID,PLANT_ID MATERIAL_ID,PLANT_ID MATERIAL_ID,PLANT_ID ?
3 15/03/18 07:35:15                  204 END_DATE END_DATE END_DATE ?
4 15/03/18 07:35:53                  272 FISCAL_PERIOD FISCAL_PERIOD FISCAL_PERIOD ?
5 15/03/18 07:36:37                  121 VALUATION_AREA_ID VALUATION_AREA_ID VALUATION_AREA_ID ?
 
T9 table has 1.4 billion records which is taking long time in joining with other tables... Also, I see aggregation of these more than 1.4 billion records as a challenge.. I tried several things like obviously statistics, changing PI of T9 table to improve JOIN of T9 with other tables.. But I cannot think of anything which can improve aggregation performance.
Please suggest if we can tune this query somehow..
 
 

ravimans 54 posts Joined 02/14
18 Mar 2015

Hi Sumit,
1) Can you try collecting stats individually for the below columns. I mean single column stats. Multi column stats if collected already let it be.
T9.MATERIAL_ID
T9.PLANT_ID
T9.END_DATE
2) Also make sure stats in collected on the below JOIN columns and all the base tables index & join columns.
T8.PARAMETER_VALUE = Q1.TAS_SOURCE_ID
 T8.PARAMETER_NAME = 'SOURCE_ID'
3) Can you try changing the join as below and rerun:
D0_IM_CORE_T.CORE_MATERIAL_STOCK_VENDOR T1 
UNION ALL
D0_IM_CORE_T.CORE_MATERIAL_STOCK_PROJ T3 
UNION ALL
D0_IM_CORE_T.CORE_MATERIAL_STOCK_CUSTOMER T5 
UNION ALL
D0_IM_CORE_T.CORE_MATERIAL_STOCK_STLOC T7 
INNER JOIN D0_IM_RSTG_TAS_MD4.STG_TEMP_STOCK_PERIOD_LIST T9 
ON T9.MATERIAL_ID = Q1.MATERIAL_ID
AND T9.PLANT_ID = Q1.PLANT_ID
 
AND T9.END_DATE >= Q1.PERIOD_END_DATE
INNER JOIN D0_IM_RSTG_TAS_MD4.STG_EXECUTION_PARAMETERS T8 -- 54
ON
T8.PARAMETER_VALUE = Q1.TAS_SOURCE_ID
-- This join sets all stock records on the timeline so there are no gaps
WHERE
 T8.PARAMETER_NAME = 'SOURCE_ID'
 
Let me know if it helps?

You must sign in to leave a comment.