Skip to content

Commit bf1f687

Browse files
author
huayang wang
committed
fix yixuela
1 parent 18acadd commit bf1f687

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

yixuela.com/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
下载易学啦 各个版本(人教版)、年级(小、初、高)、文章中 所有图片信息, 需手动指定下载图片的本地存放目录
1+
下载易学啦 各个版本(人教版、鲁人版、苏教版、沪教版、北师大版等)、年级(小、初、高)、文章中 所有图片信息, 需手动指定下载图片的本地存放目录
22

33
网址: https://www.yixuela.com/
44

5-
数据:平均每个版本大约500M的数据量, 保存方式为图片
5+
数据:平均每个版本大约1G数据量, 一共6G的数据, 保存方式为图片
66

77
微信:why19970628
88

yixuela.com/poetry.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1+
# -*- coding: utf8 -*-
12
import sys
23
import os
34
import requests
45
from lxml import etree
56
from fake_useragent import UserAgent
7+
from urllib.parse import quote, urlencode
68
import urllib
7-
89
import time
9-
version_links = ['sjb', 'hjb', 'ljb', 'bsd', 'rjb']
10+
import string
11+
version_links = ['hjb', 'ljb', 'bsd', 'rjb'] # 'sjb',
1012
admin = 'https://www.yixuela.com/'
1113
subject = 'yuwen/'
1214

@@ -37,7 +39,8 @@ def crwal_artile_content(artitle_content_url, article_folder):
3739
for index, name in enumerate(image_name):
3840
name = name.split('/')[-1]
3941
image_save_path = os.path.join(article_folder, name)
40-
url = image_name[index]
42+
ori_url = image_name[index]
43+
url = quote(ori_url, safe='/:?=')
4144
urllib.request.urlretrieve(url, image_save_path)
4245
print(f'{image_save_path} 爬取成功!')
4346

@@ -57,7 +60,7 @@ def crwal_artile(content_link, result_folder):
5760
crwal_artile_content(artitle_content_url, article_folder)
5861
except Exception as e:
5962
print(e)
60-
time.sleep(5)
63+
time.sleep(1)
6164

6265

6366
def run(url, result_path, version):
@@ -73,9 +76,9 @@ def run(url, result_path, version):
7376
os.makedirs(result_folder, exist_ok=True)
7477
try:
7578
crwal_artile(content_link, result_folder)
79+
time.sleep(2)
7680
except Exception as e:
7781
print(e)
78-
time.sleep(5)
7982

8083
# if len(title_) != len(title_link):
8184
# raise Exception('title length error')

0 commit comments

Comments
 (0)