天天看點

大資料應用之 --- clickhouse性能測試

大資料應用之 — clickhouse性能測試ssb及擴充測試,在ssb的基礎行增加了5和6的測試。

ssb測試

  1. 生成資料
cd /softw/ssb-dbgen-master
./dbgen -s 1000 -T c
./dbgen -s 1000 -T l
./dbgen -s 1000 -T p
./dbgen -s 1000 -T s


           
  1. 建表
CREATE DATABASE IF NOT EXISTS ssb on cluster cluster_lsyk_s1r1;
use ssb;
CREATE TABLE ssb.customer_local on cluster cluster_lsyk_s1r1
(
        C_CUSTKEY       UInt32,
        C_NAME          String,
        C_ADDRESS       String,
        C_CITY          LowCardinality(String),
        C_NATION        LowCardinality(String),
        C_REGION        LowCardinality(String),
        C_PHONE         String,
        C_MKTSEGMENT    LowCardinality(String)
)
ENGINE = MergeTree ORDER BY (C_CUSTKEY);

CREATE TABLE ssb.lineorder_local on cluster cluster_lsyk_s1r1
(
    LO_ORDERKEY             UInt32,
    LO_LINENUMBER           UInt8,
    LO_CUSTKEY              UInt32,
    LO_PARTKEY              UInt32,
    LO_SUPPKEY              UInt32,
    LO_ORDERDATE            Date,
    LO_ORDERPRIORITY        LowCardinality(String),
    LO_SHIPPRIORITY         UInt8,
    LO_QUANTITY             UInt8,
    LO_EXTENDEDPRICE        UInt32,
    LO_ORDTOTALPRICE        UInt32,
    LO_DISCOUNT             UInt8,
    LO_REVENUE              UInt32,
    LO_SUPPLYCOST           UInt32,
    LO_TAX                  UInt8,
    LO_COMMITDATE           Date,
    LO_SHIPMODE             LowCardinality(String)
)
ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);

CREATE TABLE ssb.part_local on cluster cluster_lsyk_s1r1
(
        P_PARTKEY       UInt32,
        P_NAME          String,
        P_MFGR          LowCardinality(String),
        P_CATEGORY      LowCardinality(String),
        P_BRAND         LowCardinality(String),
        P_COLOR         LowCardinality(String),
        P_TYPE          LowCardinality(String),
        P_SIZE          UInt8,
        P_CONTAINER     LowCardinality(String)
)
ENGINE = MergeTree ORDER BY P_PARTKEY;

CREATE TABLE ssb.supplier_local on cluster cluster_lsyk_s1r1
(
        S_SUPPKEY       UInt32,
        S_NAME          String,
        S_ADDRESS       String,
        S_CITY          LowCardinality(String),
        S_NATION        LowCardinality(String),
        S_REGION        LowCardinality(String),
        S_PHONE         String
)
ENGINE = MergeTree ORDER BY S_SUPPKEY;


-- 建立分布式表
CREATE TABLE ssb.customer on cluster cluster_lsyk_s1r1 AS ssb.customer_local
ENGINE = Distributed(cluster_lsyk_s1r1, ssb, customer_local, rand());

CREATE TABLE ssb.lineorder on cluster cluster_lsyk_s1r1 AS ssb.lineorder_local
ENGINE = Distributed(cluster_lsyk_s1r1, ssb, lineorder_local, rand());

CREATE TABLE ssb.part on cluster cluster_lsyk_s1r1 AS ssb.part_local
ENGINE = Distributed(cluster_lsyk_s1r1, ssb, part_local, rand());

CREATE TABLE ssb.supplier on cluster cluster_lsyk_s1r1 AS ssb.supplier_local
ENGINE = Distributed(cluster_lsyk_s1r1, ssb, supplier_local, rand());
           
  1. 導入資料
clickhouse-client --port 5566 --query "INSERT INTO ssb.customer FORMAT CSV" < /softw/ssb-dbgen-master/customer.tbl
clickhouse-client --port 5566 --query "INSERT INTO ssb.part FORMAT CSV" < /softw/ssb-dbgen-master/part.tbl
clickhouse-client --port 5566 --query "INSERT INTO ssb.supplier FORMAT CSV" < /softw/ssb-dbgen-master/supplier.tbl
clickhouse-client --port 5566 --query "INSERT INTO ssb.lineorder FORMAT CSV" < /softw/ssb-dbgen-master/lineorder.tbl
           

導入時間:7分鐘

大資料應用之 --- clickhouse性能測試
select 'customer',count(1) from ssb.customer c  union all
select 'lineorder',count(1) from ssb.lineorder l  union all
select 'part', count(1) from ssb.part p  union all
select 'supplier',count(1) from ssb.supplier s
           
大資料應用之 --- clickhouse性能測試

單台大小:

大資料應用之 --- clickhouse性能測試
  1. 關聯表
    -- 建立新表 本地表,分布式表關聯必須用global join
    
    CREATE TABLE ssb.lineorder_flat_local on cluster cluster_lsyk_s1r1
    ENGINE = MergeTree ORDER BY (LO_ORDERDATE, LO_ORDERKEY)
    AS SELECT
        l.LO_ORDERKEY AS LO_ORDERKEY,
        l.LO_LINENUMBER AS LO_LINENUMBER,
        l.LO_CUSTKEY AS LO_CUSTKEY,
        l.LO_PARTKEY AS LO_PARTKEY,
        l.LO_SUPPKEY AS LO_SUPPKEY,
        l.LO_ORDERDATE AS LO_ORDERDATE,
        l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
        l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
        l.LO_QUANTITY AS LO_QUANTITY,
        l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
        l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
        l.LO_DISCOUNT AS LO_DISCOUNT,
        l.LO_REVENUE AS LO_REVENUE,
        l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
        l.LO_TAX AS LO_TAX,
        l.LO_COMMITDATE AS LO_COMMITDATE,
        l.LO_SHIPMODE AS LO_SHIPMODE,
        c.C_NAME AS C_NAME,
        c.C_ADDRESS AS C_ADDRESS,
        c.C_CITY AS C_CITY,
        c.C_NATION AS C_NATION,
        c.C_REGION AS C_REGION,
        c.C_PHONE AS C_PHONE,
        c.C_MKTSEGMENT AS C_MKTSEGMENT,
        s.S_NAME AS S_NAME,
        s.S_ADDRESS AS S_ADDRESS,
        s.S_CITY AS S_CITY,
        s.S_NATION AS S_NATION,
        s.S_REGION AS S_REGION,
        s.S_PHONE AS S_PHONE,
        p.P_NAME AS P_NAME,
        p.P_MFGR AS P_MFGR,
        p.P_CATEGORY AS P_CATEGORY,
        p.P_BRAND AS P_BRAND,
        p.P_COLOR AS P_COLOR,
        p.P_TYPE AS P_TYPE,
        p.P_SIZE AS P_SIZE,
        p.P_CONTAINER AS P_CONTAINER
    FROM ssb.lineorder AS l
    global JOIN ssb.customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
    global JOIN ssb.supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
    global JOIN ssb.part AS p ON p.P_PARTKEY = l.LO_PARTKEY
    where 1 = 2;
    
    -- 建立分布式表
    CREATE TABLE ssb.lineorder_flat on cluster cluster_lsyk_s1r1 AS ssb.lineorder_flat_local
    ENGINE = Distributed(cluster_lsyk_s1r1, ssb, lineorder_flat_local, rand());
    
    -- 插入資料
    insert into ssb.lineorder_flat
    SELECT
        l.LO_ORDERKEY AS LO_ORDERKEY,
        l.LO_LINENUMBER AS LO_LINENUMBER,
        l.LO_CUSTKEY AS LO_CUSTKEY,
        l.LO_PARTKEY AS LO_PARTKEY,
        l.LO_SUPPKEY AS LO_SUPPKEY,
        l.LO_ORDERDATE AS LO_ORDERDATE,
        l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
        l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
        l.LO_QUANTITY AS LO_QUANTITY,
        l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
        l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
        l.LO_DISCOUNT AS LO_DISCOUNT,
        l.LO_REVENUE AS LO_REVENUE,
        l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
        l.LO_TAX AS LO_TAX,
        l.LO_COMMITDATE AS LO_COMMITDATE,
        l.LO_SHIPMODE AS LO_SHIPMODE,
        c.C_NAME AS C_NAME,
        c.C_ADDRESS AS C_ADDRESS,
        c.C_CITY AS C_CITY,
        c.C_NATION AS C_NATION,
        c.C_REGION AS C_REGION,
        c.C_PHONE AS C_PHONE,
        c.C_MKTSEGMENT AS C_MKTSEGMENT,
        s.S_NAME AS S_NAME,
        s.S_ADDRESS AS S_ADDRESS,
        s.S_CITY AS S_CITY,
        s.S_NATION AS S_NATION,
        s.S_REGION AS S_REGION,
        s.S_PHONE AS S_PHONE,
        p.P_NAME AS P_NAME,
        p.P_MFGR AS P_MFGR,
        p.P_CATEGORY AS P_CATEGORY,
        p.P_BRAND AS P_BRAND,
        p.P_COLOR AS P_COLOR,
        p.P_TYPE AS P_TYPE,
        p.P_SIZE AS P_SIZE,
        p.P_CONTAINER AS P_CONTAINER
    FROM ssb.lineorder AS l
    global JOIN ssb.customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
    global JOIN ssb.supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
    global JOIN ssb.part AS p ON p.P_PARTKEY = l.LO_PARTKEY
    ;
    
    -- 查資料量
    select count(1) from ssb.lineorder_flat;
               
    吃記憶體:
    大資料應用之 --- clickhouse性能測試

耗時 1339秒

大資料應用之 --- clickhouse性能測試
大資料應用之 --- clickhouse性能測試
  1. 性能測試
-- Q1.1   0.3
SELECT
	sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM
	ssb.lineorder_flat
WHERE
	toYear(LO_ORDERDATE) = 1993
	AND LO_DISCOUNT BETWEEN 1 AND 3
	AND LO_QUANTITY < 25;
	
-- Q1.2   0.05
SELECT
	sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM
	ssb.lineorder_flat
WHERE
	toYYYYMM(LO_ORDERDATE) = 199401
	AND LO_DISCOUNT BETWEEN 4 AND 6
	AND LO_QUANTITY BETWEEN 26 AND 35;
	
-- Q1.3    0.05 
SELECT
	sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM
	ssb.lineorder_flat
WHERE
	toISOWeek(LO_ORDERDATE) = 6
	AND toYear(LO_ORDERDATE) = 1994
	AND LO_DISCOUNT BETWEEN 5 AND 7
	AND LO_QUANTITY BETWEEN 26 AND 35;
	
-- Q2.1    1.6
SELECT
	sum(LO_REVENUE),
	toYear(LO_ORDERDATE) AS year,
	P_BRAND
FROM
	ssb.lineorder_flat
WHERE
	P_CATEGORY = 'MFGR#12'
	AND S_REGION = 'AMERICA'
GROUP BY
	year,
	P_BRAND
ORDER BY
	year,
	P_BRAND;
	
-- Q2.2    0.9
SELECT
	sum(LO_REVENUE),
	toYear(LO_ORDERDATE) AS year,
	P_BRAND
FROM
	ssb.lineorder_flat
WHERE
	P_BRAND >= 'MFGR#2221'
	AND P_BRAND <= 'MFGR#2228'
	AND S_REGION = 'ASIA'
GROUP BY
	year,
	P_BRAND
ORDER BY
	year,
	P_BRAND;
	
-- Q2.3   0.8
SELECT
	sum(LO_REVENUE),
	toYear(LO_ORDERDATE) AS year,
	P_BRAND
FROM
	ssb.lineorder_flat
WHERE
	P_BRAND = 'MFGR#2239'
	AND S_REGION = 'EUROPE'
GROUP BY
	year,
	P_BRAND
ORDER BY
	year,
	P_BRAND;
	
-- Q3.1    1.45
SELECT
	C_NATION,
	S_NATION,
	toYear(LO_ORDERDATE) AS year,
	sum(LO_REVENUE) AS revenue
FROM
	ssb.lineorder_flat
WHERE
	C_REGION = 'ASIA'
	AND S_REGION = 'ASIA'
	AND year >= 1992
	AND year <= 1997
GROUP BY
	C_NATION,
	S_NATION,
	year
ORDER BY
	year ASC,
	revenue DESC;
	
-- Q3.2    1.24
SELECT
	C_CITY,
	S_CITY,
	toYear(LO_ORDERDATE) AS year,
	sum(LO_REVENUE) AS revenue
FROM
	ssb.lineorder_flat
WHERE
	C_NATION = 'UNITED STATES'
	AND S_NATION = 'UNITED STATES'
	AND year >= 1992
	AND year <= 1997
GROUP BY
	C_CITY,
	S_CITY,
	year
ORDER BY
	year ASC,
	revenue DESC;
	
-- Q3.3   0.83
SELECT
	C_CITY,
	S_CITY,
	toYear(LO_ORDERDATE) AS year,
	sum(LO_REVENUE) AS revenue
FROM
	ssb.lineorder_flat
WHERE
	(C_CITY = 'UNITED KI1'
		OR C_CITY = 'UNITED KI5')
	AND (S_CITY = 'UNITED KI1'
		OR S_CITY = 'UNITED KI5')
	AND year >= 1992
	AND year <= 1997
GROUP BY
	C_CITY,
	S_CITY,
	year
ORDER BY
	year ASC,
	revenue DESC;
	
-- Q3.4   0.05
SELECT
	C_CITY,
	S_CITY,
	toYear(LO_ORDERDATE) AS year,
	sum(LO_REVENUE) AS revenue
FROM
	ssb.lineorder_flat
WHERE
	(C_CITY = 'UNITED KI1'
		OR C_CITY = 'UNITED KI5')
	AND (S_CITY = 'UNITED KI1'
		OR S_CITY = 'UNITED KI5')
	AND toYYYYMM(LO_ORDERDATE) = 199712
GROUP BY
	C_CITY,
	S_CITY,
	year
ORDER BY
	year ASC,
	revenue DESC;
	
-- Q4.1   1.78
SELECT
	toYear(LO_ORDERDATE) AS year,
	C_NATION,
	sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM
	ssb.lineorder_flat
WHERE
	C_REGION = 'AMERICA'
	AND S_REGION = 'AMERICA'
	AND (P_MFGR = 'MFGR#1'
		OR P_MFGR = 'MFGR#2')
GROUP BY
	year,
	C_NATION
ORDER BY
	year ASC,
	C_NATION ASC;
	
-- Q4.2   0.42
SELECT
	toYear(LO_ORDERDATE) AS year,
	S_NATION,
	P_CATEGORY,
	sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM
	ssb.lineorder_flat
WHERE
	C_REGION = 'AMERICA'
	AND S_REGION = 'AMERICA'
	AND (year = 1997
		OR year = 1998)
	AND (P_MFGR = 'MFGR#1'
		OR P_MFGR = 'MFGR#2')
GROUP BY
	year,
	S_NATION,
	P_CATEGORY
ORDER BY
	year ASC,
	S_NATION ASC,
	P_CATEGORY ASC;
	
-- Q4.3   0.42
SELECT
	toYear(LO_ORDERDATE) AS year,
	S_CITY,
	P_BRAND,
	sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM
	ssb.lineorder_flat
WHERE
	S_NATION = 'UNITED STATES'
	AND (year = 1997
		OR year = 1998)
	AND P_CATEGORY = 'MFGR#14'
GROUP BY
	year,
	S_CITY,
	P_BRAND
ORDER BY
	year ASC,
	S_CITY ASC,
	P_BRAND ASC;
	
--Q5.1   57.83
select
	count(1),
	sum(cnt)
from
	(
	select
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND,
		count(1) as cnt,
		sum(LO_SUPPLYCOST)
	from
		ssb.lineorder_flat
	group by
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND
	order by
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND
) t ;
--3218808  240012290

--Q5.2  1.102
select
	count(1),
	sum(cnt)
from
	(
	select
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND,
		count(1) as cnt,
		sum(LO_SUPPLYCOST)
	from
		ssb.lineorder_flat
	where
		S_NATION = 'UNITED STATES'
		AND P_CATEGORY = 'MFGR#14'
	group by
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND
	order by
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND
) t ;
--117571

--Q6.1   57.482
select
	count(1),
	sum(cnt)
from
	(
	select
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND,
		count(1) as cnt,
		sum(LO_SUPPLYCOST) as sm,
		count(distinct S_NAME) as dcnt
	from
		ssb.lineorder_flat
	group by
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND
	order by
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND
) t ;
--

--Q6.2  1.163
select
	count(1),
	sum(cnt)
from
	(
	select
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND,
		count(1) as cnt,
		sum(LO_SUPPLYCOST) as sm,
		count(distinct S_NAME) as dcnt
	from
		ssb.lineorder_flat
	where
		S_NATION = 'UNITED STATES'
		AND P_CATEGORY = 'MFGR#14'
	group by
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND
	order by
		LO_ORDERPRIORITY,
		LO_SHIPMODE,
		P_COLOR,
		P_BRAND
) t ;
--117571 386092