Query 2: Use lag function to put sales of both year 2000 and 2001 in one row:
WITH v AS
(
SELECT SUBSTR(p.Prod_Name,1,6) Prod, t.Calendar_Year,
sum(s.amount_sold) sales
FROM Sales s, Times t, Products p
WHERE s.Time_id = t.Time_id AND
s.Prod_id = p.Prod_id AND p.Prod_name in ('Y Box') AND
t.Calendar_Year in (2000,2001)
GROUP BY p.Prod_Name, t.Calendar_Year
)
select * from (
select prod, calendar_year, sales, lag(sales,1) over(partition by v.prod order by v.calendar_year) last_year_sales
from v
)
where calendar_year=2001;
Statistics
----------------------------------------------------------
0 recursive calls
0 db block gets
146 consistent gets
0 physical reads
0 redo size
761 bytes sent via SQL*Net to client
524 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
1 sorts (memory)
0 sorts (disk)
1 rows processed
Query 2_1: Alternative, user could use decode to generate the same output. This is a well-known method to do row to column transformation.
select p.prod_name, sum(decode(t.calendar_year,2000, amount_sold,0)) "2000_sales",sum(decode(t.calendar_year,2001, amount_sold,0)) "2001_sales"
from sales s, times t, products p where s.time_id=t.time_id and s.prod_Id=p.prod_id
and t.calendar_year in (2000, 2001) and p.prod_name in ('Y Box') group by p.prod_name;
Statistics
----------------------------------------------------------
0 recursive calls
0 db block gets
146 consistent gets
0 physical reads
0 redo size
687 bytes sent via SQL*Net to client
524 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
Autotrace estimates Query_2 and Query_2_1 have same performance.
What if the customer needs all years sales in one row? I could add another column using same method, either decode or lag. However, there's an elegant way to do this, using Pivot operation.
Query 2_3 :
WITH v AS
(
SELECT SUBSTR(p.Prod_Name,1,6) Prod, t.Calendar_Year,
sum(s.amount_sold) amount_sold
FROM Sales s, Times t, Products p
WHERE s.Time_id = t.Time_id AND
s.Prod_id = p.Prod_id AND p.Prod_name in ('Y Box')
GROUP BY p.Prod_Name, t.Calendar_Year
order by t.calendar_year
)
select * from
(select prod, calendar_year, amount_sold
from v) s pivot( sum(amount_sold)
for calendar_year in (1998 "1998_sales", 1999 "1999_sales",2000 "2000_sales", 2001 "2001_sales"));
Query 3: A data densified query, compares weekly cumulative sales of 2001 to that of 2000. 也就是常说的同比。在这里用PARTITION BY (v.Product_Name) RIGHT OUTER JOIN 语句与Times这个维度表做右外连接,从而得到一个在时间维度上数据都”填充“完整的结果集。这个SQL还用了上面提到的LAG函数实现行转列。从而最终得到2000和2001同比year_to_date销售额对比
WITH v AS
(
SELECT SUBSTR(p.Prod_Name,1,6) Prod, t.Calendar_Year Year,
t.Calendar_Week_Number Week, SUM(Amount_Sold) Sales
FROM Sales s, Times t, Products p
WHERE s.Time_id = t.Time_id AND
s.Prod_id = p.Prod_id AND
p.Prod_name in ('Y Box') AND
t.Calendar_Year in (2000,2001) AND
t.Calendar_Week_Number BETWEEN 30 AND 40
GROUP BY p.Prod_Name, t.Calendar_Year, t.Calendar_Week_Number
)
SELECT Prod , Year, Week, Sales, Weekly_ytd_sales, Weekly_ytd_sales_prior_year
FROM (
SELECT Prod, Year, Week, Sales, Weekly_ytd_sales,
LAG(Weekly_ytd_sales, 1) OVER (PARTITION BY Prod , Week ORDER BY Year) Weekly_ytd_sales_prior_year
FROM (
SELECT v.Prod Prod , t.Year Year, t.Week Week, NVL(v.Sales,0) Sales,
SUM(NVL(v.Sales,0)) OVER (PARTITION BY v.Prod , t.Year ORDER BY t.week) weekly_ytd_sales
FROM v
PARTITION BY (v.Prod )
RIGHT OUTER JOIN
(SELECT DISTINCT Calendar_Week_Number Week, Calendar_Year Year
FROM Times
WHERE Calendar_Year IN (2000, 2001)) t
ON (v.week = t.week AND v.Year = t.Year)
) dense_sales
) year_over_year_sales
WHERE Year = 2001 AND Week BETWEEN 30 AND 32
ORDER BY prod,year, week;
Partition out join 与普通外连接的不同之处在于实现了数据填充,它的结果集必然要比普通外连接要大。如下例子:
CREATE TABLE inventory (
time_id DATE,
product VARCHAR2(10),
quantity NUMBER);
INSERT INTO inventory VALUES (TO_DATE('01/04/01', 'DD/MM/YY'), 'bottle', 10);
INSERT INTO inventory VALUES (TO_DATE('06/04/01', 'DD/MM/YY'), 'bottle', 10);
INSERT INTO inventory VALUES (TO_DATE('01/04/01', 'DD/MM/YY'), 'can', 10);
INSERT INTO inventory VALUES (TO_DATE('04/04/01', 'DD/MM/YY'), 'can', 10);
commit;
可以看到产品销售在时间维度上是稀疏的,只有4月1日和4月6月有销量。Partition outer join能够实现数据在右表维度上的完整填充,在这个例子是时间维度。实际上就是去重后的product和time_id这两个维度上的笛卡尔积。
SELECT times.time_id, product, nvl(quantity,0) FROM inventory
PARTITION BY (product)
RIGHT OUTER JOIN times ON (times.time_id = inventory.time_id)
WHERE times.time_id BETWEEN TO_DATE('01/04/01', 'DD/MM/YY') AND TO_DATE('06/04/01', 'DD/MM/YY')
ORDER BY product,time_id;
TIME_ID PRODUCT NVL(QUANTITY,0)
--------- ---------- ---------------
01-APR-01 bottle 10
01-APR-01 bottle 10
02-APR-01 bottle 0
03-APR-01 bottle 0
04-APR-01 bottle 0
05-APR-01 bottle 0
06-APR-01 bottle 10
06-APR-01 bottle 10
01-APR-01 can 10
01-APR-01 can 10
02-APR-01 can 0
03-APR-01 can 0
04-APR-01 can 10
04-APR-01 can 10
05-APR-01 can 0
06-APR-01 can 0
我们可以看一下普通的外连接的结果:
SELECT times.time_id, product, nvl(quantity,0) FROM inventory
RIGHT OUTER JOIN times ON (times.time_id = inventory.time_id)
WHERE times.time_id BETWEEN TO_DATE('01/04/01', 'DD/MM/YY') AND TO_DATE('06/04/01', 'DD/MM/YY')
ORDER BY product,time_id;
TIME_ID PRODUCT NVL(QUANTITY,0)
--------- ---------- ---------------
01-APR-01 bottle 10
01-APR-01 bottle 10
06-APR-01 bottle 10
06-APR-01 bottle 10
01-APR-01 can 10
01-APR-01 can 10
04-APR-01 can 10
04-APR-01 can 10
02-APR-01 0
03-APR-01 0
05-APR-01 0