웹크롤러 파이선 카카오스토리 부분 디버깅
git-svn-id: svn://192.168.0.12/source@294 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -803,7 +803,11 @@ class InstaAlgorithmMulti(InstaAlgorithm):
|
||||
# printl(element['date'].strftime("%Y-%m-%d %H:%M:%S"))
|
||||
# wait(1.5)
|
||||
# self.crawl_content(element['url'], list_crawler.get_cookies(), list_crawler.get_url())
|
||||
self.list_crawl.put(element)
|
||||
try:
|
||||
self.list_crawl.put(element, timeout=10)
|
||||
except Exception as e:
|
||||
printl(e)
|
||||
printl("queue size = ", self.list_crawl.qsize())
|
||||
backup_set.add(element['url'])
|
||||
self.total_num += 1
|
||||
if self.is_until_page():
|
||||
@@ -869,12 +873,15 @@ class InstaAlgorithmMulti(InstaAlgorithm):
|
||||
|
||||
# stop child process
|
||||
for i in range(num_of_content_process):
|
||||
self.list_crawl.put(None)
|
||||
self.list_crawl.put(None, timeout=10)
|
||||
|
||||
# wait child process
|
||||
for p in p_list:
|
||||
p.join()
|
||||
|
||||
for _ in range(self.list_crawl.qsize()):
|
||||
self.list_crawl.get(block=False)
|
||||
|
||||
i += 1
|
||||
except Exception as e:
|
||||
logging.info(e)
|
||||
|
||||
Reference in New Issue
Block a user