docker-compose搭建 redis 集群

搭建三主三从的 redis 集群,docker-compose 文件如下

RC_WKDIR=/data/redis_dir docker-compose up -d
version: '3.8'

services:
  redis-node-11:
    image: redis
    container_name: redis-node-11
    hostname: redis-node-11
    ports:
      - "57011:6379"
    volumes:
      - ${RC_WKDIR:-.}/data/node-11:/data
    command: 
      - redis-server 
      - --cluster-enabled yes
      - --appendonly yes
      - --cluster-config-file nodes.conf
    networks:
      - redis-cluster

  redis-node-12:
    image: redis
    container_name: redis-node-12
    ports:
      - "57012:6379"
    volumes:
      - ${RC_WKDIR:-.}/data/node-12:/data
    command: 
      - redis-server 
      - --cluster-enabled yes
      - --appendonly yes
      - --cluster-config-file nodes.conf
    networks:
      - redis-cluster

  redis-node-21:
    image: redis
    container_name: redis-node-21
    ports:
      - "57021:6379"
    volumes:
      - ${RC_WKDIR:-.}/data/node-21:/data
    command: 
      - redis-server 
      - --cluster-enabled yes
      - --appendonly yes
      - --cluster-config-file nodes.conf
    networks:
      - redis-cluster

  redis-node-22:
    image: redis
    container_name: redis-node-22
    ports:
      - "57022:6379"
    volumes:
      - ${RC_WKDIR:-.}/data/node-22:/data
    command: 
      - redis-server 
      - --cluster-enabled yes
      - --appendonly yes
      - --cluster-config-file nodes.conf
    networks:
      - redis-cluster

  redis-node-31:
    image: redis
    container_name: redis-node-31
    ports:
      - "57031:6379"
    volumes:
      - ${RC_WKDIR:-.}/data/node-31:/data
    command: 
      - redis-server 
      - --cluster-enabled yes
      - --appendonly yes
      - --cluster-config-file nodes.conf
    networks:
      - redis-cluster

  redis-node-32:
    image: redis
    container_name: redis-node-32
    ports:
      - "57032:6379"
    volumes:
      - ${RC_WKDIR:-.}/data/node-32:/data
    command: 
      - redis-server 
      - --cluster-enabled yes
      - --appendonly yes
      - --cluster-config-file nodes.conf
    networks:
      - redis-cluster

  redis-cluster-init:
    image: redis
    container_name: redis-cluster-init
    depends_on:
      - redis-node-11
      - redis-node-12
      - redis-node-21
      - redis-node-22
      - redis-node-31
      - redis-node-32
    command: /bin/bash -c "
      echo 'Waiting for Redis nodes to be ready...';
      sleep 10;
      echo $$(redis-cli -h redis-node-11 -p 6379 cluster nodes | grep myself);
      echo '正在创建Redis集群...';
      echo 'yes' | redis-cli --cluster create \
        redis-node-11:6379 redis-node-12:6379 \
        redis-node-21:6379 redis-node-22:6379 \
        redis-node-31:6379 redis-node-32:6379 \
        --cluster-replicas 1 --cluster-yes;
      echo '配置从节点关系...';
      redis-cli --cluster add-node 
        redis-node-12:6379 redis-node-11:6379 
        --cluster-slave 
        --cluster-master-id $(redis-cli -h redis-node-11 -p 6379 cluster nodes | grep myself | awk '{print \$1}');
      redis-cli --cluster add-node 
        redis-node-22:6379 redis-node-21:6379 
        --cluster-slave 
        --cluster-master-id $(redis-cli -h redis-node-21 -p 6379 cluster nodes | grep myself | awk '{print \$1}');
      redis-cli --cluster add-node 
        redis-node-32:6379 redis-node-31:6379 
        --cluster-slave 
        --cluster-master-id $(redis-cli -h redis-node-31 -p 6379 cluster nodes | grep myself | awk '{print \$1}');
      echo 'Redis集群初始化完成!';"
    networks:
      - redis-cluster

networks:
  redis-cluster:
    driver: bridge

另外有用到扩缩容的场景,通过 python 脚本实现了下(不太严谨,抛砖引玉)

#!/usr/bin/env python3
import argparse
import subprocess
import sys
from datetime import datetime
from typing import List, Tuple, Optional, Dict

class RedisClusterResizer:
    def __init__(self, cluster_entry: str, password: Optional[str] = None):
        self.cluster_entry = cluster_entry
        self.password = password
        self.redis_cli = "redis-cli"
        if self.password:
            self.redis_cli += f" -a {self.password}"
        self.redis_cli += " --cluster"

    def log(self, message: str):
        print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {message}")

    def run_command(self, cmd: str, check: bool = True) -> Tuple[bool, str]:
        try:
            print('run cmd: {}'.format(cmd))
            result = subprocess.run(
                cmd, 
                shell=True, 
                check=check,
                stdout=subprocess.PIPE, 
                stderr=subprocess.PIPE,
                text=True
            )
            return True, result.stdout
        except subprocess.CalledProcessError as e:
            return False, f'{{stdout: {e.stdout}, stderr: {e.stderr}}}'

    def check_redis_node(self, node: str) -> bool:
        host, port = node.split(":")
        cmd = f"redis-cli -h {host} -p {port}"
        if self.password:
            cmd += f" -a {self.password}"
        cmd += " ping"
        
        success, output = self.run_command(cmd)
        return success and "PONG" in output

    def check_cluster_status(self) -> bool:
        self.log("检查集群状态...")
        cmd = f"{self.redis_cli} check {self.cluster_entry}"
        success, output = self.run_command(cmd)
        if not success:
            self.log(f"集群状态检查失败: {output}")
        return success

    def get_node_id(self, node: str) -> Optional[str]:
        host, port = node.split(":")
        cmd = f"redis-cli -h {host} -p {port}"
        if self.password:
            cmd += f" -a {self.password}"
        cmd += " cluster nodes | grep myself"
        
        success, output = self.run_command(cmd)
        if not success or not output:
            return None
        return output.split()[0]

    def get_master_nodes(self) -> List[Tuple[str, str]]:
        """返回 (node_id, node_address) 列表"""
        cmd = f"redis-cli -h {self.cluster_entry.split(':')[0]} -p {self.cluster_entry.split(':')[1]}"
        if self.password:
            cmd += f" -a {self.password}"
        cmd += " cluster nodes | grep master | grep -v fail"
        
        success, output = self.run_command(cmd)
        if not success:
            return []
        
        nodes = []
        for line in output.splitlines():
            parts = line.split()
            node_id = parts[0]
            node_addr = parts[1].split("@")[0]
            nodes.append((node_id, node_addr))
        return nodes

    def get_slave_nodes(self, master_id: str) -> List[Tuple[str, str]]:
        """返回指定主节点的从节点列表 (node_id, node_address)"""
        cmd = f"redis-cli -h {self.cluster_entry.split(':')[0]} -p {self.cluster_entry.split(':')[1]}"
        if self.password:
            cmd += f" -a {self.password}"
        cmd += f" cluster nodes | grep slave | grep {master_id}"
        
        success, output = self.run_command(cmd)
        if not success:
            return []
        
        slaves = []
        for line in output.splitlines():
            parts = line.split()
            slave_id = parts[0]
            slave_addr = parts[1].split("@")[0]
            slaves.append((slave_id, slave_addr))
        return slaves

    def get_cluster_nodes(self) -> List[Dict]:
        """获取集群所有节点信息,返回结构化数据"""
        cmd = f"redis-cli -h {self.cluster_entry.split(':')[0]} -p {self.cluster_entry.split(':')[1]}"
        if self.password:
            cmd += f" -a {self.password}"
        cmd += " cluster nodes"
        
        success, output = self.run_command(cmd)
        if not success:
            return []
        
        nodes = []
        for line in output.splitlines():
            parts = line.split()
            if len(parts) < 8:
                continue
            
            node = {
                "id": parts[0],
                "address": parts[1].split("@")[0],
                "flags": parts[2].split(","),
                "master": parts[3],
                "ping_sent": parts[4],
                "ping_recv": parts[5],
                "config_epoch": parts[6],
                "link_state": parts[7],
                "slots": []
            }
            
            if len(parts) > 8:
                if parts[8] == "->":
                    node["migrating"] = parts[9]
                else:
                    node["slots"] = self.parse_slots(parts[8:])
            
            nodes.append(node)
        return nodes

    def parse_slots(self, slot_parts: List[str]) -> List[Tuple[int, int]]:
        """解析槽位范围"""
        slots = []
        for part in slot_parts:
            if part.startswith("["):
                continue
            if "-" in part:
                start, end = map(int, part.split("-"))
                slots.append((start, end))
            else:
                slot = int(part)
                slots.append((slot, slot))
        return slots

    def show_cluster_topology(self):
        """显示集群拓扑结构"""
        nodes = self.get_cluster_nodes()
        if not nodes:
            self.log("无法获取集群节点信息")
            return
        
        # 统计主从关系
        masters = {}
        for node in nodes:
            if "master" in node["flags"] and node["master"] == "-":
                masters[node["id"]] = {
                    "node": node,
                    "slaves": []
                }
        
        for node in nodes:
            if "slave" in node["flags"] and node["master"] in masters:
                masters[node["master"]]["slaves"].append(node)
        
        # 打印拓扑
        print("\nRedis 集群拓扑结构:")
        print("=" * 60)
        for master_id, master_data in masters.items():
            master = master_data["node"]
            print(f"主节点 [{master['id'][:8]}] {master['address']}")
            print(f"  状态: {'|'.join(master['flags'])} 槽位: {len(master['slots'])}个")
            
            # 打印槽位范围
            if master["slots"]:
                slot_ranges = []
                current_start = master["slots"][0][0]
                current_end = master["slots"][0][1]
                
                for start, end in master["slots"][1:]:
                    if start == current_end + 1:
                        current_end = end
                    else:
                        slot_ranges.append(f"{current_start}-{current_end}")
                        current_start, current_end = start, end
                slot_ranges.append(f"{current_start}-{current_end}")
                
                print(f"  槽位分布: {', '.join(slot_ranges)}")
            
            # 打印从节点
            if master_data["slaves"]:
                for slave in master_data["slaves"]:
                    print(f"  └─ 从节点 [{slave['id'][:8]}] {slave['address']}")
            else:
                print("  └─ (无从节点)")
            print("-" * 60)
        
        # 打印未分配节点
        unassigned = [n for n in nodes if n["master"] == "-" and "master" not in n["flags"]]
        if unassigned:
            print("\n未分配节点:")
            for node in unassigned:
                print(f"  - {node['address']} [{node['id'][:8]}]")
        
        print("=" * 60)
        self.print_cluster_summary(nodes)

    def print_cluster_summary(self, nodes: List[Dict]):
        """打印集群摘要信息"""
        masters = [n for n in nodes if "master" in n["flags"] and n["master"] == "-"]
        slaves = [n for n in nodes if "slave" in n["flags"]]
        
        total_slots = sum(
            (end - start + 1) 
            for master in masters 
            for start, end in master["slots"]
        )
        
        print(f"\n集群摘要:")
        print(f"  - 主节点数: {len(masters)}")
        print(f"  - 从节点数: {len(slaves)}")
        print(f"  - 已分配槽位: {total_slots}/16384 ({total_slots/16384:.1%})")
        print(f"  - 节点总数: {len(nodes)}")

    def scale_out(self, new_master: str, new_slave: str):
        """扩容集群"""
        self.log(f"开始扩容操作 - 新主节点: {new_master}, 新从节点: {new_slave}")
        
        # 检查新节点
        if not self.check_redis_node(new_master):
            self.log(f"错误: 新主节点 {new_master} 不可用")
            sys.exit(1)
        if not self.check_redis_node(new_slave):
            self.log(f"错误: 新从节点 {new_slave} 不可用")
            sys.exit(1)
        
        # 添加新主节点
        self.log(f"添加新主节点 {new_master} 到集群...")
        cmd = f"{self.redis_cli} add-node {new_master} {self.cluster_entry}"
        success, output = self.run_command(cmd)
        if not success:
            self.log(f"添加主节点失败: {output}")
            # sys.exit(1)
        
        # 获取新主节点ID
        new_master_id = self.get_node_id(new_master)
        if not new_master_id:
            self.log("无法获取新主节点ID")
            sys.exit(1)
        
        # 重新分配哈希槽
        self.log("重新分配哈希槽...")
        masters = self.get_master_nodes()
        if not masters:
            self.log("无法获取主节点列表")
            sys.exit(1)
        
        # 计算每个主节点应该移动多少槽 (16384 / (原主节点数 + 1))
        slots_per_node = 16384 // (len(masters) + 1)
        
        # 从所有现有主节点迁移槽到新主节点
        cmd = (f"{self.redis_cli} reshard {self.cluster_entry} "
               f"--cluster-from all "
               f"--cluster-to {new_master_id} "
               f"--cluster-slots {slots_per_node} "
               f"--cluster-yes")
        success, output = self.run_command(cmd)
        if not success:
            self.log(f"槽迁移失败: {output}")
            # sys.exit(1)
        
        # 添加从节点
        self.log(f"添加从节点 {new_slave} 到集群...")
        cmd = (f"{self.redis_cli} add-node {new_slave} {self.cluster_entry} "
               f"--cluster-slave --cluster-master-id {new_master_id}")
        success, output = self.run_command(cmd)
        if not success:
            self.log(f"添加从节点失败: {output}")
            # sys.exit(1)
        
        # 检查集群状态
        if not self.check_cluster_status():
            sys.exit(1)
        
        self.log("扩容操作完成")

    def scale_in(self, do_master: str, do_slave: str):
        """缩容集群"""
        self.log("开始缩容操作...")
        
        # 获取所有主节点
        masters = self.get_master_nodes()
        if len(masters) <= 1:
            self.log("错误: 集群中只有一个主节点,不能缩容")
            sys.exit(1)
        
        # 选择最后一个主节点作为移除目标
        target_master_id, target_master = masters[-1]
        for mi in masters[::-1]:
            if mi[-1]==do_master:
                masters.remove(mi)
                target_master_id, target_master = masters[-1]
                break
        self.log(f"将移除主节点: {target_master} (ID: {target_master_id})")
        
        # 获取该主节点的从节点
        slaves = self.get_slave_nodes(target_master_id)
        if not slaves:
            self.log("警告: 目标主节点没有从节点")
            target_slave_id, target_slave = None, None
        else:
            target_slave_id, target_slave = slaves[0]
            self.log(f"将移除从节点: {target_slave} (ID: {target_slave_id})")
        
        # 选择一个接收槽的目标主节点(第一个主节点)
        recipient_master_id, recipient_master = masters[0]
        self.log(f"将槽迁移到目标节点: {recipient_master} (ID: {recipient_master_id})")
        
        # 迁移槽
        self.log(f"迁移主节点 {target_master} 的哈希槽...")
        cmd = (f"{self.redis_cli} reshard {target_master} "
               f"--cluster-from {target_master_id} "
               f"--cluster-to {recipient_master_id} "
               f"--cluster-slots 16384 "  # 迁移所有槽
               f"--cluster-yes")
        success, output = self.run_command(cmd)
        if not success:
            self.log(f"槽迁移失败: {output}")
            sys.exit(1)
        
        # 删除从节点
        if target_slave_id:
            self.log(f"删除从节点 {target_slave}...")
            cmd = f"{self.redis_cli} del-node {self.cluster_entry} {target_slave_id}"
            success, output = self.run_command(cmd)
            if not success:
                self.log(f"删除从节点失败: {output}")
                # sys.exit(1)
        
        # 删除主节点
        self.log(f"删除主节点 {target_master}...")
        cmd = f"{self.redis_cli} del-node {self.cluster_entry} {target_master_id}"
        success, output = self.run_command(cmd)
        if not success:
            self.log(f"删除主节点失败: {output}")
            # sys.exit(1)
        
        # 检查集群状态
        if not self.check_cluster_status():
            self.log("集群状态异常")
            sys.exit(1)
        
        self.log("缩容操作完成")

def main():
    parser = argparse.ArgumentParser(description="Redis集群管理工具")
    parser.add_argument("action", choices=["scale-out", "scale-in", "show-topology"], 
                      help="操作类型: scale-out(扩容), scale-in(缩容) 或 show-topology(显示拓扑)")
    parser.add_argument("--cluster-entry", default="10.150.24.18:57011",
                      help="集群入口节点 (默认: 10.150.24.18:57011)")
    parser.add_argument("--password", help="Redis密码")
    parser.add_argument("action_nodes", nargs="*", 
                      help="扩容/缩容时需要的新节点地址 (主节点 从节点)")
    
    args = parser.parse_args()
    
    resizer = RedisClusterResizer(args.cluster_entry, args.password)
    
    if args.action == "scale-out":
        if len(args.action_nodes) < 2:
            print("错误: 扩容需要指定新主节点和新从节点")
            sys.exit(1)
        resizer.scale_out(args.action_nodes[0], args.action_nodes[1])
    elif args.action == "scale-in":
        resizer.scale_in(args.action_nodes[0], args.action_nodes[1])
    elif args.action == "show-topology":
        resizer.show_cluster_topology()
    else:
        print(f"错误: 未知操作 {args.action}")
        sys.exit(1)

if __name__ == "__main__":
    main()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

BBJG_001

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值