1+ # -*- coding: utf8 -*-
12import sys
23import os
34import requests
45from lxml import etree
56from fake_useragent import UserAgent
7+ from urllib .parse import quote , urlencode
68import urllib
7-
89import time
9- version_links = ['sjb' , 'hjb' , 'ljb' , 'bsd' , 'rjb' ]
10+ import string
11+ version_links = ['hjb' , 'ljb' , 'bsd' , 'rjb' ] # 'sjb',
1012admin = 'https://www.yixuela.com/'
1113subject = 'yuwen/'
1214
@@ -37,7 +39,8 @@ def crwal_artile_content(artitle_content_url, article_folder):
3739 for index , name in enumerate (image_name ):
3840 name = name .split ('/' )[- 1 ]
3941 image_save_path = os .path .join (article_folder , name )
40- url = image_name [index ]
42+ ori_url = image_name [index ]
43+ url = quote (ori_url , safe = '/:?=' )
4144 urllib .request .urlretrieve (url , image_save_path )
4245 print (f'{ image_save_path } 爬取成功!' )
4346
@@ -57,7 +60,7 @@ def crwal_artile(content_link, result_folder):
5760 crwal_artile_content (artitle_content_url , article_folder )
5861 except Exception as e :
5962 print (e )
60- time .sleep (5 )
63+ time .sleep (1 )
6164
6265
6366def run (url , result_path , version ):
@@ -73,9 +76,9 @@ def run(url, result_path, version):
7376 os .makedirs (result_folder , exist_ok = True )
7477 try :
7578 crwal_artile (content_link , result_folder )
79+ time .sleep (2 )
7680 except Exception as e :
7781 print (e )
78- time .sleep (5 )
7982
8083 # if len(title_) != len(title_link):
8184 # raise Exception('title length error')
0 commit comments