git-svn-id: svn://192.168.0.12/source@345 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -86,6 +86,7 @@ def requests_get(req, timeout=requests_timeout):
|
||||
if time.time() > (start + timeout):
|
||||
req.close()
|
||||
raise Exception("timeout")
|
||||
|
||||
return b''.join(body)
|
||||
|
||||
|
||||
@@ -313,6 +314,7 @@ def crawl_content_process(qu, keyword_id, db_num):
|
||||
break
|
||||
ok = True
|
||||
while ok:
|
||||
time.sleep(2)
|
||||
try:
|
||||
# get a instance of InstaContent by do_no_proxy func.
|
||||
# if element['url'] is invalid, content is None
|
||||
@@ -339,6 +341,7 @@ def crawl_content_process(qu, keyword_id, db_num):
|
||||
send_to_db.send_body(body)
|
||||
if replies:
|
||||
send_to_db.send_reply(replies)
|
||||
printl("proxies = ", content.proxies['http'][7:])
|
||||
printl(element['url'])
|
||||
printl('ok')
|
||||
ok = False
|
||||
@@ -411,15 +414,15 @@ class ListTag:
|
||||
self.load_url(url, self.proxies)
|
||||
|
||||
def load_url(self, url, proxies):
|
||||
self.__r = requests.get(url, headers=instaheaders.get_headers_for_list_html(), proxies=proxies,
|
||||
timeout=requests_timeout, stream=True)
|
||||
self.__r = requests.get(url, headers=instaheaders.get_headers_for_list_html(), proxies=proxies, timeout=requests_timeout, stream=True)
|
||||
content = requests_get(self.__r)
|
||||
|
||||
self.log_load_url_before()
|
||||
self.__r.raise_for_status()
|
||||
self.__tag = self.__get_tag(url)
|
||||
self.__set_cookies(self.__r.cookies)
|
||||
self.__url = url
|
||||
# self.list_tag, self.end_cursor, self.has_next = instaparser.parse_list_tag_html(self.__r.content)
|
||||
#self.list_tag, self.end_cursor, self.has_next = instaparser.parse_list_tag_html(self.__r.content)
|
||||
self.list_tag, self.end_cursor, self.has_next = instaparser.parse_list_tag_html(content)
|
||||
self.__r.close()
|
||||
self.log_load_url_after()
|
||||
@@ -1033,7 +1036,7 @@ class InstaMainCrawler:
|
||||
def __init__(self):
|
||||
self.send_to_db = SendtoDB()
|
||||
self.crawl_init = InstaInit()
|
||||
# self.browser = Browser()
|
||||
#self.browser = Browser()
|
||||
self.browser = None
|
||||
self.driver = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user