웹크롤러 파이선 카카오스토리 부분 디버깅

git-svn-id: svn://192.168.0.12/source@294 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2016-09-08 05:58:24 +00:00
parent f4c95f68d2
commit c0e614bac1
2 changed files with 13 additions and 5 deletions

View File

@@ -803,7 +803,11 @@ class InstaAlgorithmMulti(InstaAlgorithm):
# printl(element['date'].strftime("%Y-%m-%d %H:%M:%S"))
# wait(1.5)
# self.crawl_content(element['url'], list_crawler.get_cookies(), list_crawler.get_url())
self.list_crawl.put(element)
try:
self.list_crawl.put(element, timeout=10)
except Exception as e:
printl(e)
printl("queue size = ", self.list_crawl.qsize())
backup_set.add(element['url'])
self.total_num += 1
if self.is_until_page():
@@ -869,12 +873,15 @@ class InstaAlgorithmMulti(InstaAlgorithm):
# stop child process
for i in range(num_of_content_process):
self.list_crawl.put(None)
self.list_crawl.put(None, timeout=10)
# wait child process
for p in p_list:
p.join()
for _ in range(self.list_crawl.qsize()):
self.list_crawl.get(block=False)
i += 1
except Exception as e:
logging.info(e)