import os
def split_large_json(input_file_path, output_dir, max_size_mb=100):
"""
拆分大型JSON文件为多个小文件(按大小分割)
参数:
input_file_path: 输入JSON文件的完整路径
output_dir: 输出目录的完整路径
max_size_mb: 每个分片的最大大小(MB),默认为100MB
"""
# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)
# 获取输入文件名(不带扩展名)
input_filename = os.path.splitext(os.path.basename(input_file_path))[0]
max_size = max_size_mb * 1024 * 1024 # 转换为字节
part_num = 1
current_size = 0
output_file = None
with open(input_file_path, 'r', encoding='utf-8') as infile:
for line in infile:
if output_file is None or current_size >= max_size:
if output_file is not None:
output_file.close()
# 构建包含完整路径的输出文件名
output_filename = os.path.join(output_dir, f"{input_filename}_part{part_num}.json")
output_file = open(output_filename, 'w', encoding='utf-8')
part_num += 1
current_size = 0
output_file.write(line)
current_size += len(line.encode('utf-8'))
if output_file is not None:
output_file.close()
# 使用示例
if __name__ == "__main__":
# 输入文件路径(请替换为实际路径)
input_json = "E:/八楼mangodb/device-sms.smsMessageHistory.json"
# 输出目录路径(请替换为实际路径)
output_directory = "E:/八楼分割"
# 调用函数进行分割
split_large_json(input_file_path=input_json,
output_dir=output_directory,
max_size_mb=100)
80GJSON文件按照指定大小分割代码
最新推荐文章于 2025-08-08 22:01:10 发布