request with session
This commit is contained in:
@@ -18,6 +18,7 @@ from .WorkerTocops import WorkerTocops
|
||||
class Crawler:
|
||||
def __init__(self):
|
||||
self.setting = Setting()
|
||||
self.session = requests.session()
|
||||
self.proxy_handler = ProxyHandler()
|
||||
self.worker = None
|
||||
|
||||
@@ -52,7 +53,7 @@ class Crawler:
|
||||
proxy = self.proxy_handler.get_proxy()
|
||||
while True:
|
||||
try:
|
||||
resp = requests.get(url, proxies=proxy, headers=headers, timeout=3)
|
||||
resp = self.session.get(url, proxies=proxy, headers=headers, timeout=3)
|
||||
except Exception as e:
|
||||
self.proxy_handler.set_proxy_dead(proxy)
|
||||
proxy = self.proxy_handler.get_proxy()
|
||||
@@ -129,6 +130,8 @@ class Crawler:
|
||||
Logger.log('Disk space is less than 4GB. Aborted')
|
||||
return
|
||||
|
||||
# self.test()
|
||||
|
||||
Logger.log('Crawling start')
|
||||
self.crawl_torrent()
|
||||
Logger.log('Crawling finished')
|
||||
|
||||
@@ -71,27 +71,4 @@ class WorkerTocops:
|
||||
links.append(torrent)
|
||||
Logger.log(' found download link : {}({})'.format(file_name, url))
|
||||
|
||||
#
|
||||
# links = []
|
||||
# file_table = soup.select('table#file_table')
|
||||
# a_tags = file_table[0].select('a')
|
||||
# torrent_links = [a for a in a_tags if '.torrent' in a.text]
|
||||
# smi_links = [a for a in a_tags if '.smi' in a.text]
|
||||
#
|
||||
# for link in torrent_links:
|
||||
# file_name = link.text.strip()
|
||||
# sub_url = link.attrs['href']
|
||||
# url = urllib.parse.urljoin(page_link.url, sub_url)
|
||||
# links.append(TorrentFile(page_link.title, page_link.episode, file_name, url))
|
||||
#
|
||||
# Logger.log(' found download link : {}({})'.format(file_name, url))
|
||||
#
|
||||
# for link in smi_links:
|
||||
# file_name = link.text.strip()
|
||||
# sub_url = link.attrs['href']
|
||||
# url = urllib.parse.urljoin(link.url, sub_url)
|
||||
# links.append(TorrentFile(page_link.title, page_link.episode, file_name, url))
|
||||
#
|
||||
# Logger.log(' found download link : {}({})'.format(file_name, url))
|
||||
|
||||
return links
|
||||
Reference in New Issue
Block a user