搭建三主三从的 redis 集群,docker-compose 文件如下
RC_WKDIR=/data/redis_dir docker-compose up -d
version: '3.8'
services:
redis-node-11:
image: redis
container_name: redis-node-11
hostname: redis-node-11
ports:
- "57011:6379"
volumes:
- ${RC_WKDIR:-.}/data/node-11:/data
command:
- redis-server
- --cluster-enabled yes
- --appendonly yes
- --cluster-config-file nodes.conf
networks:
- redis-cluster
redis-node-12:
image: redis
container_name: redis-node-12
ports:
- "57012:6379"
volumes:
- ${RC_WKDIR:-.}/data/node-12:/data
command:
- redis-server
- --cluster-enabled yes
- --appendonly yes
- --cluster-config-file nodes.conf
networks:
- redis-cluster
redis-node-21:
image: redis
container_name: redis-node-21
ports:
- "57021:6379"
volumes:
- ${RC_WKDIR:-.}/data/node-21:/data
command:
- redis-server
- --cluster-enabled yes
- --appendonly yes
- --cluster-config-file nodes.conf
networks:
- redis-cluster
redis-node-22:
image: redis
container_name: redis-node-22
ports:
- "57022:6379"
volumes:
- ${RC_WKDIR:-.}/data/node-22:/data
command:
- redis-server
- --cluster-enabled yes
- --appendonly yes
- --cluster-config-file nodes.conf
networks:
- redis-cluster
redis-node-31:
image: redis
container_name: redis-node-31
ports:
- "57031:6379"
volumes:
- ${RC_WKDIR:-.}/data/node-31:/data
command:
- redis-server
- --cluster-enabled yes
- --appendonly yes
- --cluster-config-file nodes.conf
networks:
- redis-cluster
redis-node-32:
image: redis
container_name: redis-node-32
ports:
- "57032:6379"
volumes:
- ${RC_WKDIR:-.}/data/node-32:/data
command:
- redis-server
- --cluster-enabled yes
- --appendonly yes
- --cluster-config-file nodes.conf
networks:
- redis-cluster
redis-cluster-init:
image: redis
container_name: redis-cluster-init
depends_on:
- redis-node-11
- redis-node-12
- redis-node-21
- redis-node-22
- redis-node-31
- redis-node-32
command: /bin/bash -c "
echo 'Waiting for Redis nodes to be ready...';
sleep 10;
echo $$(redis-cli -h redis-node-11 -p 6379 cluster nodes | grep myself);
echo '正在创建Redis集群...';
echo 'yes' | redis-cli --cluster create \
redis-node-11:6379 redis-node-12:6379 \
redis-node-21:6379 redis-node-22:6379 \
redis-node-31:6379 redis-node-32:6379 \
--cluster-replicas 1 --cluster-yes;
echo '配置从节点关系...';
redis-cli --cluster add-node
redis-node-12:6379 redis-node-11:6379
--cluster-slave
--cluster-master-id $(redis-cli -h redis-node-11 -p 6379 cluster nodes | grep myself | awk '{print \$1}');
redis-cli --cluster add-node
redis-node-22:6379 redis-node-21:6379
--cluster-slave
--cluster-master-id $(redis-cli -h redis-node-21 -p 6379 cluster nodes | grep myself | awk '{print \$1}');
redis-cli --cluster add-node
redis-node-32:6379 redis-node-31:6379
--cluster-slave
--cluster-master-id $(redis-cli -h redis-node-31 -p 6379 cluster nodes | grep myself | awk '{print \$1}');
echo 'Redis集群初始化完成!';"
networks:
- redis-cluster
networks:
redis-cluster:
driver: bridge
另外有用到扩缩容的场景,通过 python 脚本实现了下(不太严谨,抛砖引玉)
#!/usr/bin/env python3
import argparse
import subprocess
import sys
from datetime import datetime
from typing import List, Tuple, Optional, Dict
class RedisClusterResizer:
def __init__(self, cluster_entry: str, password: Optional[str] = None):
self.cluster_entry = cluster_entry
self.password = password
self.redis_cli = "redis-cli"
if self.password:
self.redis_cli += f" -a {self.password}"
self.redis_cli += " --cluster"
def log(self, message: str):
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {message}")
def run_command(self, cmd: str, check: bool = True) -> Tuple[bool, str]:
try:
print('run cmd: {}'.format(cmd))
result = subprocess.run(
cmd,
shell=True,
check=check,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
return True, result.stdout
except subprocess.CalledProcessError as e:
return False, f'{{stdout: {e.stdout}, stderr: {e.stderr}}}'
def check_redis_node(self, node: str) -> bool:
host, port = node.split(":")
cmd = f"redis-cli -h {host} -p {port}"
if self.password:
cmd += f" -a {self.password}"
cmd += " ping"
success, output = self.run_command(cmd)
return success and "PONG" in output
def check_cluster_status(self) -> bool:
self.log("检查集群状态...")
cmd = f"{self.redis_cli} check {self.cluster_entry}"
success, output = self.run_command(cmd)
if not success:
self.log(f"集群状态检查失败: {output}")
return success
def get_node_id(self, node: str) -> Optional[str]:
host, port = node.split(":")
cmd = f"redis-cli -h {host} -p {port}"
if self.password:
cmd += f" -a {self.password}"
cmd += " cluster nodes | grep myself"
success, output = self.run_command(cmd)
if not success or not output:
return None
return output.split()[0]
def get_master_nodes(self) -> List[Tuple[str, str]]:
"""返回 (node_id, node_address) 列表"""
cmd = f"redis-cli -h {self.cluster_entry.split(':')[0]} -p {self.cluster_entry.split(':')[1]}"
if self.password:
cmd += f" -a {self.password}"
cmd += " cluster nodes | grep master | grep -v fail"
success, output = self.run_command(cmd)
if not success:
return []
nodes = []
for line in output.splitlines():
parts = line.split()
node_id = parts[0]
node_addr = parts[1].split("@")[0]
nodes.append((node_id, node_addr))
return nodes
def get_slave_nodes(self, master_id: str) -> List[Tuple[str, str]]:
"""返回指定主节点的从节点列表 (node_id, node_address)"""
cmd = f"redis-cli -h {self.cluster_entry.split(':')[0]} -p {self.cluster_entry.split(':')[1]}"
if self.password:
cmd += f" -a {self.password}"
cmd += f" cluster nodes | grep slave | grep {master_id}"
success, output = self.run_command(cmd)
if not success:
return []
slaves = []
for line in output.splitlines():
parts = line.split()
slave_id = parts[0]
slave_addr = parts[1].split("@")[0]
slaves.append((slave_id, slave_addr))
return slaves
def get_cluster_nodes(self) -> List[Dict]:
"""获取集群所有节点信息,返回结构化数据"""
cmd = f"redis-cli -h {self.cluster_entry.split(':')[0]} -p {self.cluster_entry.split(':')[1]}"
if self.password:
cmd += f" -a {self.password}"
cmd += " cluster nodes"
success, output = self.run_command(cmd)
if not success:
return []
nodes = []
for line in output.splitlines():
parts = line.split()
if len(parts) < 8:
continue
node = {
"id": parts[0],
"address": parts[1].split("@")[0],
"flags": parts[2].split(","),
"master": parts[3],
"ping_sent": parts[4],
"ping_recv": parts[5],
"config_epoch": parts[6],
"link_state": parts[7],
"slots": []
}
if len(parts) > 8:
if parts[8] == "->":
node["migrating"] = parts[9]
else:
node["slots"] = self.parse_slots(parts[8:])
nodes.append(node)
return nodes
def parse_slots(self, slot_parts: List[str]) -> List[Tuple[int, int]]:
"""解析槽位范围"""
slots = []
for part in slot_parts:
if part.startswith("["):
continue
if "-" in part:
start, end = map(int, part.split("-"))
slots.append((start, end))
else:
slot = int(part)
slots.append((slot, slot))
return slots
def show_cluster_topology(self):
"""显示集群拓扑结构"""
nodes = self.get_cluster_nodes()
if not nodes:
self.log("无法获取集群节点信息")
return
# 统计主从关系
masters = {}
for node in nodes:
if "master" in node["flags"] and node["master"] == "-":
masters[node["id"]] = {
"node": node,
"slaves": []
}
for node in nodes:
if "slave" in node["flags"] and node["master"] in masters:
masters[node["master"]]["slaves"].append(node)
# 打印拓扑
print("\nRedis 集群拓扑结构:")
print("=" * 60)
for master_id, master_data in masters.items():
master = master_data["node"]
print(f"主节点 [{master['id'][:8]}] {master['address']}")
print(f" 状态: {'|'.join(master['flags'])} 槽位: {len(master['slots'])}个")
# 打印槽位范围
if master["slots"]:
slot_ranges = []
current_start = master["slots"][0][0]
current_end = master["slots"][0][1]
for start, end in master["slots"][1:]:
if start == current_end + 1:
current_end = end
else:
slot_ranges.append(f"{current_start}-{current_end}")
current_start, current_end = start, end
slot_ranges.append(f"{current_start}-{current_end}")
print(f" 槽位分布: {', '.join(slot_ranges)}")
# 打印从节点
if master_data["slaves"]:
for slave in master_data["slaves"]:
print(f" └─ 从节点 [{slave['id'][:8]}] {slave['address']}")
else:
print(" └─ (无从节点)")
print("-" * 60)
# 打印未分配节点
unassigned = [n for n in nodes if n["master"] == "-" and "master" not in n["flags"]]
if unassigned:
print("\n未分配节点:")
for node in unassigned:
print(f" - {node['address']} [{node['id'][:8]}]")
print("=" * 60)
self.print_cluster_summary(nodes)
def print_cluster_summary(self, nodes: List[Dict]):
"""打印集群摘要信息"""
masters = [n for n in nodes if "master" in n["flags"] and n["master"] == "-"]
slaves = [n for n in nodes if "slave" in n["flags"]]
total_slots = sum(
(end - start + 1)
for master in masters
for start, end in master["slots"]
)
print(f"\n集群摘要:")
print(f" - 主节点数: {len(masters)}")
print(f" - 从节点数: {len(slaves)}")
print(f" - 已分配槽位: {total_slots}/16384 ({total_slots/16384:.1%})")
print(f" - 节点总数: {len(nodes)}")
def scale_out(self, new_master: str, new_slave: str):
"""扩容集群"""
self.log(f"开始扩容操作 - 新主节点: {new_master}, 新从节点: {new_slave}")
# 检查新节点
if not self.check_redis_node(new_master):
self.log(f"错误: 新主节点 {new_master} 不可用")
sys.exit(1)
if not self.check_redis_node(new_slave):
self.log(f"错误: 新从节点 {new_slave} 不可用")
sys.exit(1)
# 添加新主节点
self.log(f"添加新主节点 {new_master} 到集群...")
cmd = f"{self.redis_cli} add-node {new_master} {self.cluster_entry}"
success, output = self.run_command(cmd)
if not success:
self.log(f"添加主节点失败: {output}")
# sys.exit(1)
# 获取新主节点ID
new_master_id = self.get_node_id(new_master)
if not new_master_id:
self.log("无法获取新主节点ID")
sys.exit(1)
# 重新分配哈希槽
self.log("重新分配哈希槽...")
masters = self.get_master_nodes()
if not masters:
self.log("无法获取主节点列表")
sys.exit(1)
# 计算每个主节点应该移动多少槽 (16384 / (原主节点数 + 1))
slots_per_node = 16384 // (len(masters) + 1)
# 从所有现有主节点迁移槽到新主节点
cmd = (f"{self.redis_cli} reshard {self.cluster_entry} "
f"--cluster-from all "
f"--cluster-to {new_master_id} "
f"--cluster-slots {slots_per_node} "
f"--cluster-yes")
success, output = self.run_command(cmd)
if not success:
self.log(f"槽迁移失败: {output}")
# sys.exit(1)
# 添加从节点
self.log(f"添加从节点 {new_slave} 到集群...")
cmd = (f"{self.redis_cli} add-node {new_slave} {self.cluster_entry} "
f"--cluster-slave --cluster-master-id {new_master_id}")
success, output = self.run_command(cmd)
if not success:
self.log(f"添加从节点失败: {output}")
# sys.exit(1)
# 检查集群状态
if not self.check_cluster_status():
sys.exit(1)
self.log("扩容操作完成")
def scale_in(self, do_master: str, do_slave: str):
"""缩容集群"""
self.log("开始缩容操作...")
# 获取所有主节点
masters = self.get_master_nodes()
if len(masters) <= 1:
self.log("错误: 集群中只有一个主节点,不能缩容")
sys.exit(1)
# 选择最后一个主节点作为移除目标
target_master_id, target_master = masters[-1]
for mi in masters[::-1]:
if mi[-1]==do_master:
masters.remove(mi)
target_master_id, target_master = masters[-1]
break
self.log(f"将移除主节点: {target_master} (ID: {target_master_id})")
# 获取该主节点的从节点
slaves = self.get_slave_nodes(target_master_id)
if not slaves:
self.log("警告: 目标主节点没有从节点")
target_slave_id, target_slave = None, None
else:
target_slave_id, target_slave = slaves[0]
self.log(f"将移除从节点: {target_slave} (ID: {target_slave_id})")
# 选择一个接收槽的目标主节点(第一个主节点)
recipient_master_id, recipient_master = masters[0]
self.log(f"将槽迁移到目标节点: {recipient_master} (ID: {recipient_master_id})")
# 迁移槽
self.log(f"迁移主节点 {target_master} 的哈希槽...")
cmd = (f"{self.redis_cli} reshard {target_master} "
f"--cluster-from {target_master_id} "
f"--cluster-to {recipient_master_id} "
f"--cluster-slots 16384 " # 迁移所有槽
f"--cluster-yes")
success, output = self.run_command(cmd)
if not success:
self.log(f"槽迁移失败: {output}")
sys.exit(1)
# 删除从节点
if target_slave_id:
self.log(f"删除从节点 {target_slave}...")
cmd = f"{self.redis_cli} del-node {self.cluster_entry} {target_slave_id}"
success, output = self.run_command(cmd)
if not success:
self.log(f"删除从节点失败: {output}")
# sys.exit(1)
# 删除主节点
self.log(f"删除主节点 {target_master}...")
cmd = f"{self.redis_cli} del-node {self.cluster_entry} {target_master_id}"
success, output = self.run_command(cmd)
if not success:
self.log(f"删除主节点失败: {output}")
# sys.exit(1)
# 检查集群状态
if not self.check_cluster_status():
self.log("集群状态异常")
sys.exit(1)
self.log("缩容操作完成")
def main():
parser = argparse.ArgumentParser(description="Redis集群管理工具")
parser.add_argument("action", choices=["scale-out", "scale-in", "show-topology"],
help="操作类型: scale-out(扩容), scale-in(缩容) 或 show-topology(显示拓扑)")
parser.add_argument("--cluster-entry", default="10.150.24.18:57011",
help="集群入口节点 (默认: 10.150.24.18:57011)")
parser.add_argument("--password", help="Redis密码")
parser.add_argument("action_nodes", nargs="*",
help="扩容/缩容时需要的新节点地址 (主节点 从节点)")
args = parser.parse_args()
resizer = RedisClusterResizer(args.cluster_entry, args.password)
if args.action == "scale-out":
if len(args.action_nodes) < 2:
print("错误: 扩容需要指定新主节点和新从节点")
sys.exit(1)
resizer.scale_out(args.action_nodes[0], args.action_nodes[1])
elif args.action == "scale-in":
resizer.scale_in(args.action_nodes[0], args.action_nodes[1])
elif args.action == "show-topology":
resizer.show_cluster_topology()
else:
print(f"错误: 未知操作 {args.action}")
sys.exit(1)
if __name__ == "__main__":
main()