All Forums Database
06 Jan 2014
Teradata Spool issue in Recursive issue

Hi , I am facing issues during the execution of one recursive query .
means currently the query is running out of spool. 
Even table size is < 500 MB and total record is 1800 .
During execution query used spool space > 1.5 TB  , can you please help me to optimize this query .
Thanks a lot in advance.
Please find below query :

INSERT INTO  DB_NAME.TAG_xyz_DAY_RESL_NBR

(

DAY_ID,

SOURCE_ID,

DAT_MAJ_TECH,

RESILIATION_NBR,

CONTENT_ID,

LINE_ID,

DEVICE_ID,

TYPE_BOX,

BOUQUET,

RESIL_CAUSE 

)

WITH RECURSIVE myquery(RESIL_CAUSE,DAY_ID ,SOURCE_ID,DAT_MAJ_TECH,

RESILIATION_NBR,CONTENT_ID ,LINE_ID ,DEVICE_ID ,TYPE_BOX ,BOUQUET,

level,path) AS

(

 select 

  (

case when T1.RESIL_CAUSE is null then '?' 

else T1.RESIL_CAUSE  END ) as RESIL_CAUSE

  , DAY_ID ,SOURCE_ID,DAT_MAJ_TECH,RESILIATION_NBR,CONTENT_ID ,

LINE_ID ,DEVICE_ID ,TYPE_BOX ,BOUQUET,

  0 as level,T1.RESIL_CAUSE as path   from DB_NAME.TAG_xyz_DAY_RESL_NBR_bkp T1

 where  T1.RESIL_CAUSE = 

 ( select min(T2.RESIL_CAUSE) from DB_NAME.TAG_xyz_DAY_RESL_NBR_bkp T2 

where T1.line_id = T2.line_id)

UNION

 ALL

 SELECT  

  (

case when T1.RESIL_CAUSE is null then '?' 

else T1.RESIL_CAUSE 

END ) as RESIL_CAUSE

  ,t1.DAY_ID ,t1.SOURCE_ID,t1.DAT_MAJ_TECH,t1.RESILIATION_NBR,

t1.CONTENT_ID ,t1.LINE_ID ,t1.DEVICE_ID ,t1.TYPE_BOX ,t1.BOUQUET,

m.level+1 as level

  ,(

case when char_length(m.path) >= char_length(T1.RESIL_CAUSE) 

and position((

case when T1.RESIL_CAUSE is null then '?' 

else T1.RESIL_CAUSE 

END ) in m.path) <> 0 then m.path 

   when char_length(m.path) < char_length(T1.RESIL_CAUSE) 

and position(m.path in T1.RESIL_CAUSE ) <> 0 then T1.RESIL_CAUSE

   else m.path||','|| (

case when T1.RESIL_CAUSE is null then '?' 

else T1.RESIL_CAUSE  

END ) 

end ) as path 

  from

  DB_NAME.TAG_xyz_DAY_RESL_NBR_bkp T1,

  myquery m

where (

case when T1.RESIL_CAUSE is null then '?' 

else T1.RESIL_CAUSE  END ) >  m.RESIL_CAUSE  )

 

 select DAY_ID ,SOURCE_ID,DAT_MAJ_TECH,RESILIATION_NBR,CONTENT_ID ,

LINE_ID ,DEVICE_ID ,TYPE_BOX ,BOUQUET, path as ConcatenatedRows 

 from (   select DAY_ID ,SOURCE_ID,DAT_MAJ_TECH,RESILIATION_NBR,CONTENT_ID ,

LINE_ID ,DEVICE_ID ,TYPE_BOX ,BOUQUET,path   ,'001 to '||((level + 1)  (format '999')) as RowNumber,

level as lev,max(level) over (partition by line_id) max_level  from myquery) mv

 where lev = max_level

;

 
 

06 Jan 2014

Hi
Below is the explain plan for that query :
 

Explain WITH RECURSIVE myquery(RESIL_CAUSE,DAY_ID ,SOURCE_ID,DAT_MAJ_TECH,RESILIATION_NBR,CONTENT_ID ,LINE_ID ,DEVICE_ID ,TYPE_BOX ,BOUQUET,level,path) AS

(

 select 

  (case when T1.RESIL_CAUSE is null then '?' else T1.RESIL_CAUSE  END ) as RESIL_CAUSE

  , DAY_ID ,SOURCE_ID,DAT_MAJ_TECH,RESILIATION_NBR,CONTENT_ID ,LINE_ID ,DEVICE_ID ,TYPE_BOX ,BOUQUET,

  0 as level,T1.RESIL_CAUSE as path 

  from DB_NAME.TAG_Ixyz_DAY_RESL_NBR_bkp T1

 

 where  T1.RESIL_CAUSE = (select min(T2.RESIL_CAUSE) from DB_NAME.TAG_Ixyz_DAY_RESL_NBR_bkp T2 where T1.line_id = T2.line_id)

UNION ALL

 SELECT  

  (case when T1.RESIL_CAUSE is null then '?' else T1.RESIL_CAUSE END ) as RESIL_CAUSE

  ,t1.DAY_ID ,t1.SOURCE_ID,t1.DAT_MAJ_TECH,t1.RESILIATION_NBR,t1.CONTENT_ID ,t1.LINE_ID ,t1.DEVICE_ID ,t1.TYPE_BOX ,t1.BOUQUET,m.level+1 as level

  ,(case when char_length(m.path) >= char_length(T1.RESIL_CAUSE) and position((case when T1.RESIL_CAUSE is null then '?' else T1.RESIL_CAUSE END) in m.path) <> 0 then m.path 

   when char_length(m.path) < char_length(T1.RESIL_CAUSE) and position(m.path in T1.RESIL_CAUSE ) <> 0 then T1.RESIL_CAUSE

   else m.path||','|| (case when T1.RESIL_CAUSE is null then '?' else T1.RESIL_CAUSE  END ) end) as path 

  from

  DB_NAME.TAG_Ixyz_DAY_RESL_NBR_bkp T1,

  myquery m

where  (case when T1.RESIL_CAUSE is null then '?' else T1.RESIL_CAUSE  END ) >  m.RESIL_CAUSE

 )

 select DAY_ID ,SOURCE_ID,DAT_MAJ_TECH,RESILIATION_NBR,CONTENT_ID ,LINE_ID ,DEVICE_ID ,TYPE_BOX ,BOUQUET, path as ConcatenatedRows 

 from (

  select DAY_ID ,SOURCE_ID,DAT_MAJ_TECH,RESILIATION_NBR,CONTENT_ID ,LINE_ID ,DEVICE_ID ,TYPE_BOX ,BOUQUET,path   ,'001 to '||((level + 1)  (format '999')) as RowNumber,level as lev,max(level) over (partition by line_id) max_level

 from myquery) mv

 where lev = max_level

;

 

  1) First, we lock a distinct DB_NAME."pseudo table" for read on a

     RowHash to prevent global deadlock for DB_NAME.T2. 

  2) Next, we lock DB_NAME.T2 for read. 

  3) We execute the following steps in parallel. 

       1) We do an all-AMPs RETRIEVE step from DB_NAME.T2 by way of

          an all-rows scan with no residual conditions into Spool 4

          (all_amps), which is duplicated on all AMPs.  The size of

          Spool 4 is estimated with low confidence to be 20,736 rows (

          580,608 bytes).  The estimated time for this step is 0.03

          seconds. 

       2) We do an all-AMPs RETRIEVE step from DB_NAME.T1 by way of

          an all-rows scan with no residual conditions into Spool 5

          (all_amps), which is built locally on the AMPs.  Then we do a

          SORT to order Spool 5 by the hash code of (

          DB_NAME.T1.RESIL_CAUSE, DB_NAME.T1.LINE_ID).  The size

          of Spool 5 is estimated with low confidence to be 432 rows (

          40,608 bytes).  The estimated time for this step is 0.01

          seconds. 

  4) We do an all-AMPs JOIN step from DB_NAME.T2 by way of an

     all-rows scan with a condition of ("NOT (DB_NAME.T2.LINE_ID IS

     NULL)"), which is joined to Spool 4 (Last Use) by way of an

     all-rows scan.  DB_NAME.T2 and Spool 4 are joined using a

     product join, with a join condition of ("(1=1)").  The result goes

     into Spool 6 (all_amps), which is redistributed by the hash code

     of (DB_NAME.T2.LINE_ID) to all AMPs.  The size of Spool 6 is

     estimated with low confidence to be 186,624 rows (18,849,024

     bytes).  The estimated time for this step is 0.11 seconds. 

  5) We do an all-AMPs SUM step to aggregate from Spool 6 (Last Use) by

     way of an all-rows scan.  Aggregate Intermediate Results are

     computed locally, then placed in Spool 8.  The size of Spool 8 is

     estimated with no confidence to be 186,624 rows (19,035,648 bytes). 

     The estimated time for this step is 0.19 seconds. 

  6) We do an all-AMPs RETRIEVE step from Spool 8 (Last Use) by way of

     an all-rows scan into Spool 7 (all_amps), which is duplicated on

     all AMPs.  Then we do a SORT to order Spool 7 by the hash code of

     (DB_NAME.T2.LINE_ID, DB_NAME.T2.RESIL_CAUSE).  The size of

     Spool 7 is estimated with no confidence to be 8,957,952 rows (

     429,981,696 bytes). 

  7) We do an all-AMPs JOIN step from Spool 5 (Last Use) by way of an

     all-rows scan, which is joined to Spool 7 (Last Use) by way of an

     all-rows scan.  Spool 5 and Spool 7 are joined using an inclusion

     merge join, with a join condition of ("((LINE_ID = LINE_ID) AND

     (NOT (LINE_ID IS NULL ))) AND (RESIL_CAUSE = Field_3)").  The

     result goes into Spool 11 (all_amps), which is built locally on

     the AMPs.  The size of Spool 11 is estimated with no confidence to

     be 432 rows (152,064 bytes).  The estimated time for this step is

     0.04 seconds. 

  8) We do an all-AMPs RETRIEVE step from Spool 11 by way of an

     all-rows scan into Spool 2 (all_amps), which is built locally on

     the AMPs.  The size of Spool 2 is estimated with no confidence to

     be 432 rows (152,064 bytes).  The estimated time for this step is

     0.03 seconds. 

  9) We do an all-AMPs RETRIEVE step from Spool 11 (Last Use) by way of

     an all-rows scan into Spool 12 (all_amps), which is duplicated on

     all AMPs.  The size of Spool 12 is estimated with no confidence to

     be 20,736 rows (912,384 bytes).  The estimated time for this step

     is 0.03 seconds. 

 10) We do an all-AMPs JOIN step from DB_NAME.T1 by way of an

     all-rows scan with no residual conditions, which is joined to

     Spool 12 (Last Use) by way of an all-rows scan.  DB_NAME.T1 and

     Spool 12 are joined using a product join, with a join condition of

     ("(( CASE WHEN (DB_NAME.T1.RESIL_CAUSE IS NULL) THEN ('?') ELSE

     (DB_NAME.T1.RESIL_CAUSE) END ))> RESIL_CAUSE").  The result

     goes into Spool 13 (all_amps), which is built locally on the AMPs. 

     The size of Spool 13 is estimated with no confidence to be 8,979

     rows (3,160,608 bytes).  The estimated time for this step is 0.04

     seconds. 

 11) We do an all-AMPs RETRIEVE step from Spool 13 (Last Use) by way of

     an all-rows scan into Spool 11 (all_amps), which is built locally

     on the AMPs.  The size of Spool 11 is estimated with no confidence

     to be 449,382 rows (158,182,464 bytes).  The estimated time for

     this step is 0.03 seconds.  If one or more rows are inserted into

     spool 11, then go to step 8. 

 12) We do an all-AMPs STAT FUNCTION step from Spool 2 (Last Use) by

     way of an all-rows scan into Spool 17 (Last Use), which is

     redistributed by hash code to all AMPs.  The result rows are put

     into Spool 15 (all_amps), which is built locally on the AMPs.  The

     size is estimated with no confidence to be 449,382 rows (

     71,002,356 bytes). 

 13) We do an all-AMPs RETRIEVE step from Spool 15 (Last Use) by way of

     an all-rows scan into Spool 3 (used to materialize view, derived

     table or table function mv) (all_amps), which is built locally on

     the AMPs.  The size of Spool 3 is estimated with no confidence to

     be 449,382 rows (50,330,784 bytes).  The estimated time for this

     step is 0.19 seconds. 

 14) We do an all-AMPs RETRIEVE step from Spool 3 (Last Use) by way of

     an all-rows scan with a condition of ("mv.LEV = mv.MAX_LEVEL")

     into Spool 20 (all_amps), which is built locally on the AMPs.  The

     size of Spool 20 is estimated with no confidence to be 449,382

     rows (49,881,402 bytes).  The estimated time for this step is 0.17

     seconds. 

 15) Finally, we send out an END TRANSACTION step to all AMPs involved

     in processing the request.

  -> The contents of Spool 20 are sent back to the user as the result

     of statement 1. 

 

Raja_KT 1246 posts Joined 07/09
06 Jan 2014

Whaw!!!! Even with volatile table or recursive view separately also, it may be slow. Finally you can insert to target table. Have you thought of it?

You know the data well, since in your select you have lots of comparisons that give rise to FTS,concatenation, function.

Cheers,

Raja K Thaw
My wiki: http://en.wikipedia.org/wiki/User:Kt_raj1
Street Children suffer not by their fault. We can help them if we want.

dnoeth 4628 posts Joined 11/04
06 Jan 2014

Did you ever run the SELECT on it's own?
 
IMHO there's no proper join-condition in the recursive part resulting in huge cross joins, you should check the result when you add a WHERE level <= 3.
Additionally you need to cast T1.RESIL_CAUSE to a larger VARCHAR as the first SELECT of a UNION determines the datatype, so concatenating will result in truncation.
 
Seems you want to to do a kind of GROUP_CONCAT (in a strange way), is there any rule for processing in a specific order? Simply materialize the base data in a Volatile Table including a ROW_NUMBER and then use this to join on t1.rn+1 = t2.rn

Dieter

06 Jan 2014

Hi Raja & Dieter .. Thanks a lot for valuable input.
here my requirement is like :<Input Set table data>
 DAY_ID                   CONTENT_ID                LINE_ID                    Resiliation
1/1/2014                   12345                        07-098787                  1,2
1/1/2014                    12345                       07-098787                  0, ?,1
1/1/2014                     12345                      07-098787                   2,4,5
1/1/2014                      12345                      07-098787                  1,2,3
And the expected output is :
DAY_ID                        CONTENT_ID                            LINE_ID                Resiliation
1/1/2014                      12345                                    07-098787              1,2,0, ?,4,5,3
 
My above query is working fine when record count is very less .. but in case of more than 500 records , its showing spool space issue .
 
I have tried same kind of query with simple rank & concat funtion, but i was not able to remove duplicate in output record set  , below is the output of rank & concat funtion query :
 DAY_ID         CONTENT_ID                      LINE_ID                              Resiliation
1/1/2014       12345                                07-098787                      1,2,0,?,1,2,4,5,1,2,3
While as per requirement no duplicate allowed in the resiliation column.
 
can you please help me to in this scenario ?
Thanks a lot in advance.
 

07 Jan 2014

Hi Experts , 
can you please help me for this scenario :)

07 Jan 2014

Hello Experts :
 
can you please help :)

Raja_KT 1246 posts Joined 07/09
07 Jan 2014

Right now I can think think of stored procedure with nested loops, checking with if condition , after stripping each field from resiliation and compare , before inserting to a final table.
Cheers,
Raja

Raja K Thaw
My wiki: http://en.wikipedia.org/wiki/User:Kt_raj1
Street Children suffer not by their fault. We can help them if we want.

dnoeth 4628 posts Joined 11/04
08 Jan 2014

Hi abhishek,
you got denormalized input and want to create denormalized output? Ouch.

Dieter

Raja_KT 1246 posts Joined 07/09
08 Jan 2014

I feel that sanitation check should be there at each and every stage of data flow. Also Modeler, should check  data matching with end requirements.

Raja K Thaw
My wiki: http://en.wikipedia.org/wiki/User:Kt_raj1
Street Children suffer not by their fault. We can help them if we want.

09 Jan 2014

Thanks a lot Dieter and Raja ,
 
Actually this data is coming from source system , that's a real time system < Like IPTV data> , so we cant break split is further .
Only the issue i am facing with the duplicate values after concat.
is thier any possible approch to handle this case ?
 
Meanhwhile i am trying same thing through teradata utilities & unix  ,
means will export all the records(with duplicate ) in a flat file,
then remove duplicate with unix awk 
again load the data into table .
 
 

Raja_KT 1246 posts Joined 07/09
09 Jan 2014

You have not told about the tool that reads real time data. In SAP DS for example, reading from wsdl, you can reformat the data in anyway you want and can load to target.
If you are using Ab Initio, you can read through nested dmls and  reformat in anyway you want.
Again using unix, sed, awk and even normal read line will allow you to extract parts of each line and load again to file, using for loop.
Good luck,
 

Raja K Thaw
My wiki: http://en.wikipedia.org/wiki/User:Kt_raj1
Street Children suffer not by their fault. We can help them if we want.

09 Jan 2014

Hi Experts ,
 
Thanks for your comments.
Finally solution has been implemented  with the fastexport , Unix (duplicate removal) , and again Fastload.
 
but still for my knowledge  i am looking same kind of solution  through teradata sql.
 

prashant05kumar 8 posts Joined 01/14
19 Jan 2014

Hi Abhishek,
Can you please help us with what kind of solution you implemented. I believe we also have a very similar kind of business need. It would be help ful for us to if you extend the way you handled it.
 
Thanks,
Prashant

27 Jan 2014

Hi Prashant ,
 
can you please mention your exact requirement here :?

Adeel Chaudhry 773 posts Joined 04/08
02 Feb 2014

What exactly do you mean by 'While as per requirement no duplicate allowed in the resiliation column.'? Duplication of values as a chunk or individual values?

-- If you are stuck at something .... consider it an opportunity to think anew.

vinaywani 10 posts Joined 11/11
12 Feb 2014

--This query would work for maximum 3 values present in Resiliation e.g. 1,2,3
--if it contains more than that then we need to modify the query to accomodate it
--I have seperated column values 1,2,3 into individual rows after removing ','
--We need to insert this result into Temp table and then run recursive query to join
--the rows  to form column again
--To deduplicate the values I have used union.
--If you have INSTR udf installed then solution would become very easy.
--I have used complex SUBSTR to imitate INSTR function
--The following query will give you unique values in Resilence column which was major hurdle.
 

SELECT * FROM
(

SELECT

Day_id, content_id, Line_id,
SUBSTR(Resiliation,1,INDEX(Resiliation,',')-1) AS COL1

FROM Test

UNION

SELECT
Day_id, content_id, Line_id,
CASE WHEN INDEX(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),',')>0 THEN

SUBSTR(
SUBSTR(Resiliation,INDEX(Resiliation,',')+1),1,
INDEX(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),',')-1)

ELSE
SUBSTR(
SUBSTR(Resiliation,INDEX(Resiliation,',')+1),1
)

END  AS COL1
FROM Test
UNION

SELECT
Day_id, content_id, Line_id,
CASE WHEN INDEX(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),',')>0

THEN
SUBSTR(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),

INDEX(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),',')+1
)

WHEN INDEX(SUBSTR(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),

INDEX(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),',')+1
),',')>0 THEN

SUBSTR(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),

INDEX(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),',')+1,
INDEX(SUBSTR(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),

INDEX(SUBSTR(Resiliation,INDEX(Resiliation,',')+1),',')+1
),',')-1)

ELSE
'NULL'

END  AS COL1
FROM test)T
WHERE col1<>'NULL'
 ORDER BY col1
Let me know if anything is missing.
 
 

You must sign in to leave a comment.