clickhouse 批量插入
孤雁长飞 人气:0一.安装使用
ClickHouse是Yandex提供的一个开源的列式存储数据库管理系统,多用于联机分析(OLAP)场景,可提供海量数据的存储和分析,同时利用其数据压缩和向量化引擎的特性,能提供快速的数据搜索。
Ⅰ).安装
sudo yum install yum-utils sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 sudo yum install clickhouse-server clickhouse-client sudo /etc/init.d/clickhouse-server start clickhouse-client
Ⅱ).配置
a).clickhouse-server
CLICKHOUSE_USER=username CLICKHOUSE_LOGDIR=${CLICKHOUSE_HOME}/log/clickhoue-server CLICKHOUSE_LOGDIR_USER=username CLICKHOUSE_DATADIR_OLD=${CLICKHOUSE_HOME}/data/old CLICKHOUSE_DATADIR=${CLICKHOUSE_HOME}/data
b).config.xml
... ... <!-- 配置日志参数 --> <logger> <level>info</level> <log>${CLICKHOUSE_HOME}/log/clickhoue-server/clickhoue-server.log</log> <errorlog>${CLICKHOUSE_HOME}/log/clickhoue-server/clickhoue-server-error.log</errorlog> <size>100M</size> <count>5</count> </logger> <!-- 配置数据保存路径 --> <path>${CLICKHOUSE_HOME}</> <tmp_path>${CLICKHOUSE_HOME}/tmp</> <user_files_path>${CLICKHOUSE_HOME}/user_files</> <!-- 配置监听 --> <listen_host>::</listen_host> <!-- 配置时区 --> <timezone>Asiz/Shanghai</timezone> ... ...
Ⅲ).启停服务
#### a).启动服务 sudo service clickhouse-server start #### b).停止服务 sudo service clickhouse-server stop
Ⅳ).客户端访问
clickhouse-client
二.常用命令
Ⅰ).创建表
CREATE TABLE IF NOT EXISTS database.table_name ON cluster cluster_shardNum_replicasNum( 'id' UInt64, 'name' String, 'time' UInt64, 'age' UInt8, 'flag' UInt8 ) ENGINE = MergeTree PARTITION BY toDate(time/1000) ORDER BY (id,name) SETTINGS index_granularity = 8192
Ⅱ).创建物化视图
CREATE MATERIALIZED VIEW database.view_name ON cluster cluster_shardNum_replicasNum ENGINE = AggregatingMergeTree PARTITION BY toYYYYMMDD(time) ORDER BY (id,name) AS SELECT toStartOfHour(toDateTime(time/1000)) as time, id, name, sumState( if (flag = 1, 1, 0)) AS successCount, sumState( if (flag = 0, 1, 0)) AS faildCount, sumState( if ((age < 10), 1, 0)) AS rang1Age, sumState( if ((age > 10) AND (age < 20), 2, 0)) AS rang2Age, sumState( if ((age > 20), 3, 0)) AS rang3Age, maxState(age) AS maxAge, minState(age) AS minAge FROM datasource.table_name GROUP BY time,id,name
Ⅲ).插入数据
a).普通数据插入
INSERT INTO database.table_name(id, name, age, flag) VALUES(1, 'test', 15, 0)
b).Json数据插入
INSERT INTO database.table_name FORMAT JSONEachRow{"id":"1", "name":"test", "age":"11", "flag":"1"}
Ⅳ).查询数据
a).表数据查询
SELECT * FROM database.table_name WHERE id=1
b).物化视图查询
SELECT id, name, sumMerge(successCount), sumMerge(faildCount), sumMerge(rang1Age), sumMerge(rang2Age), maxMerge(maxAge), minMerge(minAge) FROM database.view_name WHERE id=1 GROUP BY id, name
Ⅴ).创建NESTED表
CREATE TABLE IF NOT EXISTS database.table_name( 'id' UInt64, 'name' String, 'time' UInt64, 'age' UInt8, 'flag' UInt8 nested_table_name Nested ( sequence UInt32, id UInt64, name String, time UInt64, age UInt8, flag UInt8 socketAddr String, socketRemotePort UInt32, socketLocalPort UInt32, eventTime UInt64, exceptionClassName String, hashCode Int32, nextSpanId UInt64 )) ENGINE = MergeTree PARTITION BY toDate (time / 1000) ORDER BY (id, name, time) SETTINGS index_granularity = 8192
Ⅵ).NESTED表数据查询
SELECT table1.*,table1.id FROM nest.table_name AS table1 array JOIN nested_table_name AS table2
Ⅶ).配置字典项
<dictionaries> <dictionary> <name>url</name> <source> <clickhouse> <host>hostname</host> <port>9000</port> <user>default</user> <password/> <db>dict</db> <table>url_dict</table> </clickhouse> </source> <lifetime> <min>30</min> <max>36</max> </lifetime> <layout> <hashed/> </layout> <structure> <id> <name>id</name> </id> <attribute> <name>hash_code</name> <type>String</type> <null_value/> </attribute> <attribute> <name>url</name> <type>String</type> <null_value/> </attribute> </structure> </dictionary> <dictionary> <name>url_hash</name> <source> <clickhouse> <host>hostname</host> <port>9000</port> <user>default</user> <password/> <db>dict</db> <table>url_hash</table> </clickhouse> </source> <lifetime> <min>30</min> <max>36</max> </lifetime> <layout> <complex_key_hashed/> </layout> <structure> <key> <attribute> <name>hash_code</name> <type>String</type> </attribute> </key> <attribute> <name>url</name> <type>String</type> <null_value/> </attribute> </structure> </dictionary> </dictionaries>
Ⅷ).字典查询
SELECT id, dictGet('name', 'name', toUInt64(name)) AS name, dictGetString('url', 'url', tuple(url)) AS url FROM table_name
Ⅸ).导入数据
clickhouse-client --query="INSERT INTO database.table_name FORMAT CSVWithNames" < /path/import_filename.csv
Ⅹ).导出数据
clickhouse-client --query="SELECT * FROM database.table_name FORMAT CSV" sed 's/"//g' > /path/export_filename.csv
Ⅺ).查看partition状态
SELECT table, name, partition,active FROM system.parts WHERE database='database_name'
Ⅻ).清理partition
ALTER TABLE database.table_name ON cluster cluster_shardNum_replicasNum detach partition 'partition_id'
XIII).查看列的压缩率
SELECT database, table, name, formatReadableSize(sum(data_compressed_bytes) AS c) AS comp, formatReadableSize(sum(data_uncompressed_bytes) AS r) AS raw, c/r AS comp_ratio FROM system.columns WHERE database='database_name' AND table='table_name' GROUP BY name
XIV).查看物化视图的磁盘占用
clickhouse-client --query="SELECT partition,count(*) AS partition_num, formatReadableSize(sum(bytes)) AS disk_size FROM system.columns WHERE database='database_name' " --external --?le=***.sql --name=parts --structure='table String, name String, partition UInt64, engine String' -h hostname
加载全部内容