From b19499b6a4991b68acbcddd88a126c3bd22420a4 Mon Sep 17 00:00:00 2001 From: mjjo53 Date: Mon, 25 Jun 2018 20:23:40 +0900 Subject: [PATCH] request with session --- Crawler/Crawler.py | 5 ++++- Crawler/WorkerTocops.py | 25 +------------------------ 2 files changed, 5 insertions(+), 25 deletions(-) diff --git a/Crawler/Crawler.py b/Crawler/Crawler.py index 29993ec..6c76736 100755 --- a/Crawler/Crawler.py +++ b/Crawler/Crawler.py @@ -18,6 +18,7 @@ from .WorkerTocops import WorkerTocops class Crawler: def __init__(self): self.setting = Setting() + self.session = requests.session() self.proxy_handler = ProxyHandler() self.worker = None @@ -52,7 +53,7 @@ class Crawler: proxy = self.proxy_handler.get_proxy() while True: try: - resp = requests.get(url, proxies=proxy, headers=headers, timeout=3) + resp = self.session.get(url, proxies=proxy, headers=headers, timeout=3) except Exception as e: self.proxy_handler.set_proxy_dead(proxy) proxy = self.proxy_handler.get_proxy() @@ -129,6 +130,8 @@ class Crawler: Logger.log('Disk space is less than 4GB. Aborted') return + # self.test() + Logger.log('Crawling start') self.crawl_torrent() Logger.log('Crawling finished') diff --git a/Crawler/WorkerTocops.py b/Crawler/WorkerTocops.py index 06219c5..877856c 100644 --- a/Crawler/WorkerTocops.py +++ b/Crawler/WorkerTocops.py @@ -71,27 +71,4 @@ class WorkerTocops: links.append(torrent) Logger.log(' found download link : {}({})'.format(file_name, url)) - # - # links = [] - # file_table = soup.select('table#file_table') - # a_tags = file_table[0].select('a') - # torrent_links = [a for a in a_tags if '.torrent' in a.text] - # smi_links = [a for a in a_tags if '.smi' in a.text] - # - # for link in torrent_links: - # file_name = link.text.strip() - # sub_url = link.attrs['href'] - # url = urllib.parse.urljoin(page_link.url, sub_url) - # links.append(TorrentFile(page_link.title, page_link.episode, file_name, url)) - # - # Logger.log(' found download link : {}({})'.format(file_name, url)) - # - # for link in smi_links: - # file_name = link.text.strip() - # sub_url = link.attrs['href'] - # url = urllib.parse.urljoin(link.url, sub_url) - # links.append(TorrentFile(page_link.title, page_link.episode, file_name, url)) - # - # Logger.log(' found download link : {}({})'.format(file_name, url)) - - return links \ No newline at end of file + return links