天天看点

sql查询(三)--having子句求众数、中位数 - wl413911

sql查询(三)--having子句求众数、中位数

 

一、建立需要查询的表
CREATE TABLE Graduates
(name   VARCHAR(16) PRIMARY KEY,
 income INTEGER NOT NULL);
-- 桑普森是个离群值,会拉高平均数
INSERT INTO Graduates VALUES(\'桑普森\', 400000);
INSERT INTO Graduates VALUES(\'迈克\',     30000);
INSERT INTO Graduates VALUES(\'怀特\',   20000);
INSERT INTO Graduates VALUES(\'阿诺德\', 20000);
INSERT INTO Graduates VALUES(\'史密斯\',     20000);
INSERT INTO Graduates VALUES(\'劳伦斯\',   15000);
INSERT INTO Graduates VALUES(\'哈德逊\',   15000);
INSERT INTO Graduates VALUES(\'肯特\',     10000);
INSERT INTO Graduates VALUES(\'贝克\',   10000);
INSERT INTO Graduates VALUES(\'斯科特\',   10000);      
1、求众数
(1)在having子句中用包含谓词all 的子查询
SELECT income, COUNT(*) AS cnt 
FROM Graduates 
GROUP BY income HAVING COUNT(*) >= ALL ( SELECT COUNT(*) FROM Graduates GROUP BY income);
缺点:谓词all 用于null和 空集时会出现问题
(2)在having子句中 用 包含极值函数的子查询
select income ,count(*) as cnt
from graduates
group by income
having cnt >= (select max(cnt)
         from (select count(*) as cnt
            from graduates
            group by income) as tmp));

      
2、求中位数
用having子句进行自连接求中位数
第一步-- 将集合里的元素按照大小分为上半部分、下班部分 两个子集,求其交集(无论聚合数据的数目是 奇数 偶数)
select  t1.income 
from gradutes t1 , gradutes t2
group by t1.income
having sum(case when t2.income >=t1.income then 1 else 0 end) >= count(*)/2
and sum(case when t2.income <=t1.income then 1 else 0 end) >= count(*)/2;
第二步 -- 将上下部分集合求得的交集,去重,然后求平均,得到中值
select avg(distinct income)
from ( select t1.income
    from gradutes t1,gradutes t2
    group by t1.income
    having sum(case when t2.income >= t1.income then 1 else 0) >= count(*)/2
      and sum (case when t2.incomme <= t1.income then 1 else 0 ) >= count(*)/2) as tmp