大資料應用之 — clickhouse性能測試ssb及擴充測試,在ssb的基礎行增加了5和6的測試。
ssb測試
- 生成資料
cd /softw/ssb-dbgen-master
./dbgen -s 1000 -T c
./dbgen -s 1000 -T l
./dbgen -s 1000 -T p
./dbgen -s 1000 -T s
- 建表
CREATE DATABASE IF NOT EXISTS ssb on cluster cluster_lsyk_s1r1;
use ssb;
CREATE TABLE ssb.customer_local on cluster cluster_lsyk_s1r1
(
C_CUSTKEY UInt32,
C_NAME String,
C_ADDRESS String,
C_CITY LowCardinality(String),
C_NATION LowCardinality(String),
C_REGION LowCardinality(String),
C_PHONE String,
C_MKTSEGMENT LowCardinality(String)
)
ENGINE = MergeTree ORDER BY (C_CUSTKEY);
CREATE TABLE ssb.lineorder_local on cluster cluster_lsyk_s1r1
(
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY LowCardinality(String),
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE LowCardinality(String)
)
ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
CREATE TABLE ssb.part_local on cluster cluster_lsyk_s1r1
(
P_PARTKEY UInt32,
P_NAME String,
P_MFGR LowCardinality(String),
P_CATEGORY LowCardinality(String),
P_BRAND LowCardinality(String),
P_COLOR LowCardinality(String),
P_TYPE LowCardinality(String),
P_SIZE UInt8,
P_CONTAINER LowCardinality(String)
)
ENGINE = MergeTree ORDER BY P_PARTKEY;
CREATE TABLE ssb.supplier_local on cluster cluster_lsyk_s1r1
(
S_SUPPKEY UInt32,
S_NAME String,
S_ADDRESS String,
S_CITY LowCardinality(String),
S_NATION LowCardinality(String),
S_REGION LowCardinality(String),
S_PHONE String
)
ENGINE = MergeTree ORDER BY S_SUPPKEY;
-- 建立分布式表
CREATE TABLE ssb.customer on cluster cluster_lsyk_s1r1 AS ssb.customer_local
ENGINE = Distributed(cluster_lsyk_s1r1, ssb, customer_local, rand());
CREATE TABLE ssb.lineorder on cluster cluster_lsyk_s1r1 AS ssb.lineorder_local
ENGINE = Distributed(cluster_lsyk_s1r1, ssb, lineorder_local, rand());
CREATE TABLE ssb.part on cluster cluster_lsyk_s1r1 AS ssb.part_local
ENGINE = Distributed(cluster_lsyk_s1r1, ssb, part_local, rand());
CREATE TABLE ssb.supplier on cluster cluster_lsyk_s1r1 AS ssb.supplier_local
ENGINE = Distributed(cluster_lsyk_s1r1, ssb, supplier_local, rand());
- 導入資料
clickhouse-client --port 5566 --query "INSERT INTO ssb.customer FORMAT CSV" < /softw/ssb-dbgen-master/customer.tbl
clickhouse-client --port 5566 --query "INSERT INTO ssb.part FORMAT CSV" < /softw/ssb-dbgen-master/part.tbl
clickhouse-client --port 5566 --query "INSERT INTO ssb.supplier FORMAT CSV" < /softw/ssb-dbgen-master/supplier.tbl
clickhouse-client --port 5566 --query "INSERT INTO ssb.lineorder FORMAT CSV" < /softw/ssb-dbgen-master/lineorder.tbl
導入時間:7分鐘
select 'customer',count(1) from ssb.customer c union all
select 'lineorder',count(1) from ssb.lineorder l union all
select 'part', count(1) from ssb.part p union all
select 'supplier',count(1) from ssb.supplier s
單台大小:
- 關聯表
吃記憶體:-- 建立新表 本地表,分布式表關聯必須用global join CREATE TABLE ssb.lineorder_flat_local on cluster cluster_lsyk_s1r1 ENGINE = MergeTree ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS SELECT l.LO_ORDERKEY AS LO_ORDERKEY, l.LO_LINENUMBER AS LO_LINENUMBER, l.LO_CUSTKEY AS LO_CUSTKEY, l.LO_PARTKEY AS LO_PARTKEY, l.LO_SUPPKEY AS LO_SUPPKEY, l.LO_ORDERDATE AS LO_ORDERDATE, l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY, l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY, l.LO_QUANTITY AS LO_QUANTITY, l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE, l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE, l.LO_DISCOUNT AS LO_DISCOUNT, l.LO_REVENUE AS LO_REVENUE, l.LO_SUPPLYCOST AS LO_SUPPLYCOST, l.LO_TAX AS LO_TAX, l.LO_COMMITDATE AS LO_COMMITDATE, l.LO_SHIPMODE AS LO_SHIPMODE, c.C_NAME AS C_NAME, c.C_ADDRESS AS C_ADDRESS, c.C_CITY AS C_CITY, c.C_NATION AS C_NATION, c.C_REGION AS C_REGION, c.C_PHONE AS C_PHONE, c.C_MKTSEGMENT AS C_MKTSEGMENT, s.S_NAME AS S_NAME, s.S_ADDRESS AS S_ADDRESS, s.S_CITY AS S_CITY, s.S_NATION AS S_NATION, s.S_REGION AS S_REGION, s.S_PHONE AS S_PHONE, p.P_NAME AS P_NAME, p.P_MFGR AS P_MFGR, p.P_CATEGORY AS P_CATEGORY, p.P_BRAND AS P_BRAND, p.P_COLOR AS P_COLOR, p.P_TYPE AS P_TYPE, p.P_SIZE AS P_SIZE, p.P_CONTAINER AS P_CONTAINER FROM ssb.lineorder AS l global JOIN ssb.customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY global JOIN ssb.supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY global JOIN ssb.part AS p ON p.P_PARTKEY = l.LO_PARTKEY where 1 = 2; -- 建立分布式表 CREATE TABLE ssb.lineorder_flat on cluster cluster_lsyk_s1r1 AS ssb.lineorder_flat_local ENGINE = Distributed(cluster_lsyk_s1r1, ssb, lineorder_flat_local, rand()); -- 插入資料 insert into ssb.lineorder_flat SELECT l.LO_ORDERKEY AS LO_ORDERKEY, l.LO_LINENUMBER AS LO_LINENUMBER, l.LO_CUSTKEY AS LO_CUSTKEY, l.LO_PARTKEY AS LO_PARTKEY, l.LO_SUPPKEY AS LO_SUPPKEY, l.LO_ORDERDATE AS LO_ORDERDATE, l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY, l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY, l.LO_QUANTITY AS LO_QUANTITY, l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE, l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE, l.LO_DISCOUNT AS LO_DISCOUNT, l.LO_REVENUE AS LO_REVENUE, l.LO_SUPPLYCOST AS LO_SUPPLYCOST, l.LO_TAX AS LO_TAX, l.LO_COMMITDATE AS LO_COMMITDATE, l.LO_SHIPMODE AS LO_SHIPMODE, c.C_NAME AS C_NAME, c.C_ADDRESS AS C_ADDRESS, c.C_CITY AS C_CITY, c.C_NATION AS C_NATION, c.C_REGION AS C_REGION, c.C_PHONE AS C_PHONE, c.C_MKTSEGMENT AS C_MKTSEGMENT, s.S_NAME AS S_NAME, s.S_ADDRESS AS S_ADDRESS, s.S_CITY AS S_CITY, s.S_NATION AS S_NATION, s.S_REGION AS S_REGION, s.S_PHONE AS S_PHONE, p.P_NAME AS P_NAME, p.P_MFGR AS P_MFGR, p.P_CATEGORY AS P_CATEGORY, p.P_BRAND AS P_BRAND, p.P_COLOR AS P_COLOR, p.P_TYPE AS P_TYPE, p.P_SIZE AS P_SIZE, p.P_CONTAINER AS P_CONTAINER FROM ssb.lineorder AS l global JOIN ssb.customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY global JOIN ssb.supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY global JOIN ssb.part AS p ON p.P_PARTKEY = l.LO_PARTKEY ; -- 查資料量 select count(1) from ssb.lineorder_flat;
耗時 1339秒
- 性能測試
-- Q1.1 0.3
SELECT
sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM
ssb.lineorder_flat
WHERE
toYear(LO_ORDERDATE) = 1993
AND LO_DISCOUNT BETWEEN 1 AND 3
AND LO_QUANTITY < 25;
-- Q1.2 0.05
SELECT
sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM
ssb.lineorder_flat
WHERE
toYYYYMM(LO_ORDERDATE) = 199401
AND LO_DISCOUNT BETWEEN 4 AND 6
AND LO_QUANTITY BETWEEN 26 AND 35;
-- Q1.3 0.05
SELECT
sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM
ssb.lineorder_flat
WHERE
toISOWeek(LO_ORDERDATE) = 6
AND toYear(LO_ORDERDATE) = 1994
AND LO_DISCOUNT BETWEEN 5 AND 7
AND LO_QUANTITY BETWEEN 26 AND 35;
-- Q2.1 1.6
SELECT
sum(LO_REVENUE),
toYear(LO_ORDERDATE) AS year,
P_BRAND
FROM
ssb.lineorder_flat
WHERE
P_CATEGORY = 'MFGR#12'
AND S_REGION = 'AMERICA'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
-- Q2.2 0.9
SELECT
sum(LO_REVENUE),
toYear(LO_ORDERDATE) AS year,
P_BRAND
FROM
ssb.lineorder_flat
WHERE
P_BRAND >= 'MFGR#2221'
AND P_BRAND <= 'MFGR#2228'
AND S_REGION = 'ASIA'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
-- Q2.3 0.8
SELECT
sum(LO_REVENUE),
toYear(LO_ORDERDATE) AS year,
P_BRAND
FROM
ssb.lineorder_flat
WHERE
P_BRAND = 'MFGR#2239'
AND S_REGION = 'EUROPE'
GROUP BY
year,
P_BRAND
ORDER BY
year,
P_BRAND;
-- Q3.1 1.45
SELECT
C_NATION,
S_NATION,
toYear(LO_ORDERDATE) AS year,
sum(LO_REVENUE) AS revenue
FROM
ssb.lineorder_flat
WHERE
C_REGION = 'ASIA'
AND S_REGION = 'ASIA'
AND year >= 1992
AND year <= 1997
GROUP BY
C_NATION,
S_NATION,
year
ORDER BY
year ASC,
revenue DESC;
-- Q3.2 1.24
SELECT
C_CITY,
S_CITY,
toYear(LO_ORDERDATE) AS year,
sum(LO_REVENUE) AS revenue
FROM
ssb.lineorder_flat
WHERE
C_NATION = 'UNITED STATES'
AND S_NATION = 'UNITED STATES'
AND year >= 1992
AND year <= 1997
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
-- Q3.3 0.83
SELECT
C_CITY,
S_CITY,
toYear(LO_ORDERDATE) AS year,
sum(LO_REVENUE) AS revenue
FROM
ssb.lineorder_flat
WHERE
(C_CITY = 'UNITED KI1'
OR C_CITY = 'UNITED KI5')
AND (S_CITY = 'UNITED KI1'
OR S_CITY = 'UNITED KI5')
AND year >= 1992
AND year <= 1997
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
-- Q3.4 0.05
SELECT
C_CITY,
S_CITY,
toYear(LO_ORDERDATE) AS year,
sum(LO_REVENUE) AS revenue
FROM
ssb.lineorder_flat
WHERE
(C_CITY = 'UNITED KI1'
OR C_CITY = 'UNITED KI5')
AND (S_CITY = 'UNITED KI1'
OR S_CITY = 'UNITED KI5')
AND toYYYYMM(LO_ORDERDATE) = 199712
GROUP BY
C_CITY,
S_CITY,
year
ORDER BY
year ASC,
revenue DESC;
-- Q4.1 1.78
SELECT
toYear(LO_ORDERDATE) AS year,
C_NATION,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM
ssb.lineorder_flat
WHERE
C_REGION = 'AMERICA'
AND S_REGION = 'AMERICA'
AND (P_MFGR = 'MFGR#1'
OR P_MFGR = 'MFGR#2')
GROUP BY
year,
C_NATION
ORDER BY
year ASC,
C_NATION ASC;
-- Q4.2 0.42
SELECT
toYear(LO_ORDERDATE) AS year,
S_NATION,
P_CATEGORY,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM
ssb.lineorder_flat
WHERE
C_REGION = 'AMERICA'
AND S_REGION = 'AMERICA'
AND (year = 1997
OR year = 1998)
AND (P_MFGR = 'MFGR#1'
OR P_MFGR = 'MFGR#2')
GROUP BY
year,
S_NATION,
P_CATEGORY
ORDER BY
year ASC,
S_NATION ASC,
P_CATEGORY ASC;
-- Q4.3 0.42
SELECT
toYear(LO_ORDERDATE) AS year,
S_CITY,
P_BRAND,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM
ssb.lineorder_flat
WHERE
S_NATION = 'UNITED STATES'
AND (year = 1997
OR year = 1998)
AND P_CATEGORY = 'MFGR#14'
GROUP BY
year,
S_CITY,
P_BRAND
ORDER BY
year ASC,
S_CITY ASC,
P_BRAND ASC;
--Q5.1 57.83
select
count(1),
sum(cnt)
from
(
select
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND,
count(1) as cnt,
sum(LO_SUPPLYCOST)
from
ssb.lineorder_flat
group by
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND
order by
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND
) t ;
--3218808 240012290
--Q5.2 1.102
select
count(1),
sum(cnt)
from
(
select
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND,
count(1) as cnt,
sum(LO_SUPPLYCOST)
from
ssb.lineorder_flat
where
S_NATION = 'UNITED STATES'
AND P_CATEGORY = 'MFGR#14'
group by
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND
order by
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND
) t ;
--117571
--Q6.1 57.482
select
count(1),
sum(cnt)
from
(
select
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND,
count(1) as cnt,
sum(LO_SUPPLYCOST) as sm,
count(distinct S_NAME) as dcnt
from
ssb.lineorder_flat
group by
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND
order by
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND
) t ;
--
--Q6.2 1.163
select
count(1),
sum(cnt)
from
(
select
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND,
count(1) as cnt,
sum(LO_SUPPLYCOST) as sm,
count(distinct S_NAME) as dcnt
from
ssb.lineorder_flat
where
S_NATION = 'UNITED STATES'
AND P_CATEGORY = 'MFGR#14'
group by
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND
order by
LO_ORDERPRIORITY,
LO_SHIPMODE,
P_COLOR,
P_BRAND
) t ;
--117571 386092