前边有简单的说明过通过副本数以及修改元数据信息,移除节点保证citus 的可用性,以下是一个简单的
节点添加以及数据移动的说明
节点需要开启逻辑复制,所以以前的基础镜像稍有调整,同时因为开启了安全认证同时使用了.pgpass
FROM dalongrong/pgspider:citus-9.1
RUN echo "wal_level = logical" >> /usr/local/pgspider/share/postgresql/postgresql.conf.sample
version: "3"
services:
pg-citus-master:
container_name: pg-citus-master
image: dalongrong/pgspider:citus-9.1-wal
volumes:
- "./csvfiles:/opt/csv"
- "./sql:/docker-entrypoint-initdb.d/"
- "./.pgpass:/var/lib/postgresql/.pgpass"
ports:
- "5432:5432"
environment:
- "POSTGRES_PASSWORD=dalong"
pg-citus-worker:
container_name: pg-citus-worker
image: dalongrong/pgspider:citus-9.1-wal
volumes:
- "./csvfiles:/opt/csv"
- "./sql:/docker-entrypoint-initdb.d/"
- "./.pgpass:/var/lib/postgresql/.pgpass"
environment:
- "POSTGRES_PASSWORD=dalong"
ports:
- "5433:5432"
pg-citus-worker2:
container_name: pg-citus-worker2
image: dalongrong/pgspider:citus-9.1-wal
volumes:
- "./csvfiles:/opt/csv"
- "./sql:/docker-entrypoint-initdb.d/"
- "./.pgpass:/var/lib/postgresql/.pgpass"
environment:
- "POSTGRES_PASSWORD=dalong"
ports:
- "5434:5432"
pg-citus-worker3:
container_name: pg-citus-worker3
image: dalongrong/pgspider:citus-9.1-wal
volumes:
- "./csvfiles:/opt/csv"
- "./sql:/docker-entrypoint-initdb.d/"
- "./.pgpass:/var/lib/postgresql/.pgpass"
environment:
- "POSTGRES_PASSWORD=dalong"
ports:
- "5435:5432"
-- wrap in transaction to ensure Docker flag always visible
BEGIN;
CREATE EXTENSION citus;
COMMIT;
curl https://examples.citusdata.com/tutorial/companies.csv > csvfiles/scompanies.csv
curl https://examples.citusdata.com/tutorial/campaigns.csv > csvfiles/campaigns.csv
curl https://examples.citusdata.com/tutorial/ads.csv > csvfiles/ads.csv
docker-compose up -d
CREATE TABLE companies (
id bigint NOT NULL,
name text NOT NULL,
image_url text,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
?
CREATE TABLE campaigns (
id bigint NOT NULL,
company_id bigint NOT NULL,
name text NOT NULL,
cost_model text NOT NULL,
state text NOT NULL,
monthly_budget bigint,
blacklisted_site_urls text[],
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
?
CREATE TABLE ads (
id bigint NOT NULL,
company_id bigint NOT NULL,
campaign_id bigint NOT NULL,
name text NOT NULL,
image_url text,
target_url text,
impressions_count bigint DEFAULT 0,
clicks_count bigint DEFAULT 0,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
ALTER TABLE companies
ADD PRIMARY KEY (id);
ALTER TABLE campaigns
ADD PRIMARY KEY (id, company_id);
ALTER TABLE ads
ADD PRIMARY KEY (id, company_id);
容器内部执行命令
\copy companies from ‘companies.csv‘ with csv
\copy campaigns from ‘campaigns.csv‘ with csv
\copy ads from ‘ads.csv‘ with csv
set citus.shard_count =4;
SELECT create_distributed_table(‘companies‘, ‘id‘);
SELECT create_distributed_table(‘campaigns‘, ‘company_id‘);
SELECT create_distributed_table(‘ads‘, ‘company_id‘);
SELECT
master_add_node (‘pg-citus-worker3‘,
‘5432‘);
测试使用ads 表
select * from pg_dist_placement join pg_dist_node on pg_dist_placement.groupid=pg_dist_node.groupid where shardid in (select shardid from pg_dist_shard where logicalrelid=‘ads‘::regclass);
信息
CREATE TABLE ads_102017 (
id bigint NOT NULL,
company_id bigint NOT NULL,
campaign_id bigint NOT NULL,
name text NOT NULL,
image_url text,
target_url text,
impressions_count bigint DEFAULT 0,
clicks_count bigint DEFAULT 0,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
CREATE TABLE ads_102019 (
id bigint NOT NULL,
company_id bigint NOT NULL,
campaign_id bigint NOT NULL,
name text NOT NULL,
image_url text,
target_url text,
impressions_count bigint DEFAULT 0,
clicks_count bigint DEFAULT 0,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
worker2创建发布:
CREATE PUBLICATION pub_shard FOR TABLE ads_102017;
CREATE PUBLICATION pub_shard2 FOR TABLE ads_102019;
worrker3 订阅:
CREATE SUBSCRIPTION sub_shard
CONNECTION ‘host=pg-citus-worker2‘
PUBLICATION pub_shard;
CREATE SUBSCRIPTION sub_shard2
CONNECTION ‘host=pg-citus-worker2‘
PUBLICATION pub_shard2;
master节点修改元数据
实际为了数据的一直,肯能还需要进行锁表操作 lock table ads IN EXCLUSIVE MODE;
等待数据同步完备修改元数据:
update pg_dist_placement set groupid=3 where shardid in (102017,102019) and groupid=2;
删除worker2 的对应分片的数据,以及删除发布
DROP PUBLICATION pub_shard;
DROP PUBLICATION pub_shard2;
drop table ads_102017;
drop table ads_102019;
删除worker3的订阅
DROP SUBSCRIPTION sub_shard;
DROP SUBSCRIPTION sub_shard2;
select * from ads
如果不出问题,数据查询一样是没有问题的
Citus 对于数据处理同时包含了亲缘性,需要关联的分片一起移动,比如上边的操作我们只移动了ads ,但是ads 与
campaigns 几个分片也是需要移动的,不移动关联查询的效果
查询亲缘表信息,注意每个分片的信息都需要调整的,具体操作方法一样
select * from pg_dist_shard where logicalrelid in(select logicalrelid from pg_dist_partition where colocationid=(select colocationid from pg_dist_partition where partmethod=‘h‘ and logicalrelid=‘ads‘::regclass)) and (shardminvalue,shardmaxvalue)=(select shardminvalue,shardmaxvalue from pg_dist_shard where shardid=102017);
信息如下:
https://yq.aliyun.com/articles/647369
https://github.com/rongfengliang/pgspider-docker
https://www.cnblogs.com/rongfengliang/p/12446941.html
http://docs.citusdata.com/en/v9.2/develop/reference_ddl.html#colocation-groups
http://docs.citusdata.com/en/v9.2/sharding/data_modeling.html#colocation
原文:https://www.cnblogs.com/rongfengliang/p/12450033.html