diff --git a/.gitignore b/.gitignore index 70ec9b1..4401fea 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ **/.idea/ **/__pycache__/ *.user +**/build-*/ +WebBasedCrawler/proxy.txt diff --git a/WebBasedCrawler/insta/instacrawl.py b/WebBasedCrawler/insta/instacrawl.py index 1598e7e..248012f 100644 --- a/WebBasedCrawler/insta/instacrawl.py +++ b/WebBasedCrawler/insta/instacrawl.py @@ -72,7 +72,7 @@ body_wait_sec = 0.5 reply_wait_sec = 0.8 num_of_page_down = 20 num_of_content_process = 10 -requests_timeout = 60 +requests_timeout = 5 num_of_retry_proxy = 5 logging.basicConfig(level=logging.INFO, @@ -226,7 +226,7 @@ def make_list_instance(url, proxies=None): return list_crawler except requests.exceptions.ProxyError as e: - printd('proxy: '+str(e.args[0].pool.proxy), e) + printd('proxy: {}'.format(e)) printd("Fail to make list instance") return None @@ -243,7 +243,7 @@ def make_content_instance(url, proxies=None): return content except requests.exceptions.ProxyError as e: - printd('proxy: '+str(e.args[0].pool.proxy), e) + printd('proxy: {}'.format(e)) printd("Fail to make content instance") return None