request with session

This commit is contained in:
2018-06-25 20:23:40 +09:00
parent 8c9ddb9ce8
commit b19499b6a4
2 changed files with 5 additions and 25 deletions

View File

@@ -18,6 +18,7 @@ from .WorkerTocops import WorkerTocops
class Crawler: class Crawler:
def __init__(self): def __init__(self):
self.setting = Setting() self.setting = Setting()
self.session = requests.session()
self.proxy_handler = ProxyHandler() self.proxy_handler = ProxyHandler()
self.worker = None self.worker = None
@@ -52,7 +53,7 @@ class Crawler:
proxy = self.proxy_handler.get_proxy() proxy = self.proxy_handler.get_proxy()
while True: while True:
try: try:
resp = requests.get(url, proxies=proxy, headers=headers, timeout=3) resp = self.session.get(url, proxies=proxy, headers=headers, timeout=3)
except Exception as e: except Exception as e:
self.proxy_handler.set_proxy_dead(proxy) self.proxy_handler.set_proxy_dead(proxy)
proxy = self.proxy_handler.get_proxy() proxy = self.proxy_handler.get_proxy()
@@ -129,6 +130,8 @@ class Crawler:
Logger.log('Disk space is less than 4GB. Aborted') Logger.log('Disk space is less than 4GB. Aborted')
return return
# self.test()
Logger.log('Crawling start') Logger.log('Crawling start')
self.crawl_torrent() self.crawl_torrent()
Logger.log('Crawling finished') Logger.log('Crawling finished')

View File

@@ -71,27 +71,4 @@ class WorkerTocops:
links.append(torrent) links.append(torrent)
Logger.log(' found download link : {}({})'.format(file_name, url)) Logger.log(' found download link : {}({})'.format(file_name, url))
# return links
# links = []
# file_table = soup.select('table#file_table')
# a_tags = file_table[0].select('a')
# torrent_links = [a for a in a_tags if '.torrent' in a.text]
# smi_links = [a for a in a_tags if '.smi' in a.text]
#
# for link in torrent_links:
# file_name = link.text.strip()
# sub_url = link.attrs['href']
# url = urllib.parse.urljoin(page_link.url, sub_url)
# links.append(TorrentFile(page_link.title, page_link.episode, file_name, url))
#
# Logger.log(' found download link : {}({})'.format(file_name, url))
#
# for link in smi_links:
# file_name = link.text.strip()
# sub_url = link.attrs['href']
# url = urllib.parse.urljoin(link.url, sub_url)
# links.append(TorrentFile(page_link.title, page_link.episode, file_name, url))
#
# Logger.log(' found download link : {}({})'.format(file_name, url))
return links