웹크롤러 파이선 카카오스토리 부분 디버깅
git-svn-id: svn://192.168.0.12/source@294 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -803,7 +803,11 @@ class InstaAlgorithmMulti(InstaAlgorithm):
|
||||
# printl(element['date'].strftime("%Y-%m-%d %H:%M:%S"))
|
||||
# wait(1.5)
|
||||
# self.crawl_content(element['url'], list_crawler.get_cookies(), list_crawler.get_url())
|
||||
self.list_crawl.put(element)
|
||||
try:
|
||||
self.list_crawl.put(element, timeout=10)
|
||||
except Exception as e:
|
||||
printl(e)
|
||||
printl("queue size = ", self.list_crawl.qsize())
|
||||
backup_set.add(element['url'])
|
||||
self.total_num += 1
|
||||
if self.is_until_page():
|
||||
@@ -869,12 +873,15 @@ class InstaAlgorithmMulti(InstaAlgorithm):
|
||||
|
||||
# stop child process
|
||||
for i in range(num_of_content_process):
|
||||
self.list_crawl.put(None)
|
||||
self.list_crawl.put(None, timeout=10)
|
||||
|
||||
# wait child process
|
||||
for p in p_list:
|
||||
p.join()
|
||||
|
||||
for _ in range(self.list_crawl.qsize()):
|
||||
self.list_crawl.get(block=False)
|
||||
|
||||
i += 1
|
||||
except Exception as e:
|
||||
logging.info(e)
|
||||
|
||||
@@ -114,7 +114,7 @@ class KakaoInit(CrawlInit):
|
||||
date_now = datetime.datetime.now()
|
||||
result = datetime.datetime(year=date_now.year, month=date_now.month, day=date_now.day)
|
||||
result += datetime.timedelta(days=self.before_day)
|
||||
return result
|
||||
return result.date()
|
||||
else:
|
||||
return self.start_day()
|
||||
|
||||
@@ -122,7 +122,7 @@ class KakaoInit(CrawlInit):
|
||||
if self.is_realtime():
|
||||
date_now = datetime.datetime.now()
|
||||
result = datetime.datetime(year=date_now.year, month=date_now.month, day=date_now.day)
|
||||
return result
|
||||
return result.date()
|
||||
else:
|
||||
return self.end_day()
|
||||
|
||||
@@ -967,6 +967,7 @@ class KakaoMainCrawler:
|
||||
i += 1
|
||||
except Exception as e:
|
||||
logging.info(e)
|
||||
# check for exception
|
||||
# self.driver.quit()
|
||||
self.set_driver(self.browser.new_browser())
|
||||
wait(5)
|
||||
@@ -975,5 +976,5 @@ class KakaoMainCrawler:
|
||||
printl("Finished Crawling :)")
|
||||
|
||||
self.send_to_db.close()
|
||||
# self.driver.quit()
|
||||
self.driver.quit()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user