웹크롤러 파이선 카카오스토리 부분 디버깅

git-svn-id: svn://192.168.0.12/source@294 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2016-09-08 05:58:24 +00:00
parent f4c95f68d2
commit c0e614bac1
2 changed files with 13 additions and 5 deletions

View File

@@ -803,7 +803,11 @@ class InstaAlgorithmMulti(InstaAlgorithm):
# printl(element['date'].strftime("%Y-%m-%d %H:%M:%S"))
# wait(1.5)
# self.crawl_content(element['url'], list_crawler.get_cookies(), list_crawler.get_url())
self.list_crawl.put(element)
try:
self.list_crawl.put(element, timeout=10)
except Exception as e:
printl(e)
printl("queue size = ", self.list_crawl.qsize())
backup_set.add(element['url'])
self.total_num += 1
if self.is_until_page():
@@ -869,12 +873,15 @@ class InstaAlgorithmMulti(InstaAlgorithm):
# stop child process
for i in range(num_of_content_process):
self.list_crawl.put(None)
self.list_crawl.put(None, timeout=10)
# wait child process
for p in p_list:
p.join()
for _ in range(self.list_crawl.qsize()):
self.list_crawl.get(block=False)
i += 1
except Exception as e:
logging.info(e)

View File

@@ -114,7 +114,7 @@ class KakaoInit(CrawlInit):
date_now = datetime.datetime.now()
result = datetime.datetime(year=date_now.year, month=date_now.month, day=date_now.day)
result += datetime.timedelta(days=self.before_day)
return result
return result.date()
else:
return self.start_day()
@@ -122,7 +122,7 @@ class KakaoInit(CrawlInit):
if self.is_realtime():
date_now = datetime.datetime.now()
result = datetime.datetime(year=date_now.year, month=date_now.month, day=date_now.day)
return result
return result.date()
else:
return self.end_day()
@@ -967,6 +967,7 @@ class KakaoMainCrawler:
i += 1
except Exception as e:
logging.info(e)
# check for exception
# self.driver.quit()
self.set_driver(self.browser.new_browser())
wait(5)
@@ -975,5 +976,5 @@ class KakaoMainCrawler:
printl("Finished Crawling :)")
self.send_to_db.close()
# self.driver.quit()
self.driver.quit()