Partition outer join in Oracle (data densification)

Partition outer join is a method for “data-densification”. If you have sparse data it helps to easily duplicate data with new SQL syntax:

SELECT  
  select_expression
FROM    
  table_reference PARTITION BY (expr [,expr ]...)
RIGHT OUTER JOIN  
  table_reference

or

SELECT  
  select_expression
FROM    
  table_reference
LEFT OUTER JOIN  
  table_reference PARTITION BY (expr [,expr ]...)

Example

Let’s prepare some test data

CREATE TABLE fct_tbl
(
  cust_id VARCHAR2(10),
  mth     DATE,
  vol     NUMBER
);

INSERT INTO fct_tbl
SELECT 
  'CUST_'||mod(LEVEL,2) cust_id,
  add_months(trunc(SYSDATE, 'YY'), mod(LEVEL, 4)) mth,
  level vol
FROM dual
CONNECT BY LEVEL < 100;

COMMIT;

Following query doesn’t report all months in 2014 and each customer is not reported in each month.

SELECT 
  cust_id, 
  to_char(mth, 'DD.MON.YYYY') mth_name, 
  sum(vol) vol
FROM 
  fct_tbl f
GROUP BY mth, cust_id
ORDER BY mth, cust_id;

CUST_ID    MTH_NAME                    VOL
---------- -------------------- ----------
CUST_0     01.JAN.2014                1200 
CUST_1     01.FEB.2014                1225 
CUST_0     01.MAR.2014                1250 
CUST_1     01.APR.2014                1275

Following query shows how to generate missing months (6 months) and each customer in each month. It’s old method.

WITH time_dim AS
(
  SELECT add_months(trunc(SYSDATE, 'YY'), LEVEL-1) mth
    FROM dual
  CONNECT BY LEVEL < 7
),
cust_dim AS
(
  SELECT DISTINCT cust_id
    FROM fct_tbl
) 
SELECT 
  c.cust_id, 
  to_char(t.mth, 'DD.MON.YYYY') mth_name, 
  sum(nvl(f.vol,0)) vol
FROM 
  time_dim t, 
  cust_dim c, 
  fct_tbl f
WHERE t.mth = f.mth(+)
  AND c.cust_id = f.cust_id(+)  
GROUP BY t.mth, c.cust_id
ORDER BY t.mth, c.cust_id;

CUST_ID    MTH_NAME                    VOL
---------- -------------------- ----------
CUST_0     01.JAN.2014                1200 
CUST_1     01.JAN.2014                   0 
CUST_0     01.FEB.2014                   0 
CUST_1     01.FEB.2014                1225 
CUST_0     01.MAR.2014                1250 
CUST_1     01.MAR.2014                   0 
CUST_0     01.APR.2014                   0 
CUST_1     01.APR.2014                1275 
CUST_0     01.MAY.2014                   0 
CUST_1     01.MAY.2014                   0 
CUST_0     01.JUN.2014                   0 
CUST_1     01.JUN.2014                   0 

 12 rows selected

It can be done in much easier way using PARTITION OUTER JOIN

WITH time_dim AS
(SELECT add_months(trunc(SYSDATE, 'YY'), LEVEL-1) mth
  FROM dual
  CONNECT BY LEVEL < 7) 
SELECT 
  f.cust_id, 
  to_char(t.mth, 'DD.MON.YYYY') mth_name, 
  sum(nvl(vol,0)) vol
FROM 
  time_dim t LEFT OUTER JOIN 
  fct_tbl f  PARTITION BY  (f.cust_id) 
ON(t.mth = f.mth)
GROUP BY f.cust_id, t.mth
order by t.mth, f.cust_id;

CUST_ID    MTH_NAME                    VOL
---------- -------------------- ----------
CUST_0     01.JAN.2014                1200 
CUST_1     01.JAN.2014                   0 
CUST_0     01.FEB.2014                   0 
CUST_1     01.FEB.2014                1225 
CUST_0     01.MAR.2014                1250 
CUST_1     01.MAR.2014                   0 
CUST_0     01.APR.2014                   0 
CUST_1     01.APR.2014                1275 
CUST_0     01.MAY.2014                   0 
CUST_1     01.MAY.2014                   0 
CUST_0     01.JUN.2014                   0 
CUST_1     01.JUN.2014                   0 

 12 rows selected

or

WITH time_dim AS
(SELECT add_months(trunc(SYSDATE, 'YY'), LEVEL-1) mth
  FROM dual
  CONNECT BY LEVEL < 7) 
SELECT 
  f.cust_id, 
  to_char(t.mth, 'DD.MON.YYYY') mth_name, 
  sum(nvl(vol,0)) vol
FROM 
  fct_tbl f  PARTITION BY  (f.cust_id) 
  RIGHT OUTER JOIN
  time_dim t  
ON(t.mth = f.mth)
GROUP BY f.cust_id, t.mth
ORDER BY t.mth, f.cust_id;

CUST_ID    MTH_NAME                    VOL
---------- -------------------- ----------
CUST_0     01.JAN.2014                1200 
CUST_1     01.JAN.2014                   0 
CUST_0     01.FEB.2014                   0 
CUST_1     01.FEB.2014                1225 
CUST_0     01.MAR.2014                1250 
CUST_1     01.MAR.2014                   0 
CUST_0     01.APR.2014                   0 
CUST_1     01.APR.2014                1275 
CUST_0     01.MAY.2014                   0 
CUST_1     01.MAY.2014                   0 
CUST_0     01.JUN.2014                   0 
CUST_1     01.JUN.2014                   0 

 12 rows selected

Have a fun 🙂

Tomasz

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.