twitter, youtube 크롤러 추가 시작

This commit is contained in:
mjjo
2017-07-04 14:29:41 +09:00
parent f6310e7922
commit 81bbad8f19
5 changed files with 53 additions and 5 deletions

View File

View File

@@ -0,0 +1,36 @@
class TwitterMainCrawler:
def __init__(self):
self.keyword_id = -1
pass
def init_keyword_id(self, keyword_id):
if type(keyword_id) != int:
self.keyword_id = int(keyword_id)
else:
self.keyword_id = keyword_id
# self.crawl_init.get_keyword_parameters(keyword_id)
# self.crawl_init.disconnect()
pass
def init_db(self, db_num):
# self.send_to_db.set_db(db_num)
pass
def init_before_day(self, before_day):
# self.crawl_init.set_before_day(before_day)
pass
def init_until_page(self, until_page):
# self.crawl_init.set_until_page(until_page)
pass
def set_arguments(self, browser, keyword_id, db_num, before_day, until_page):
self.init_keyword_id(keyword_id)
self.init_db(db_num)
self.init_before_day(before_day)
self.init_until_page(until_page)
# self.init_browser(browser)
def start(self):
pass

View File

@@ -11,6 +11,8 @@ from kakao import kakaocrawl
from naver import navercrawl
from facebook import facebookcrawl
from facebook import facebookcrawlbs
from twitter import twittercrawl
from youtube import youtubecrawl
from base.baseclasses import print_and_flush
@@ -26,8 +28,12 @@ class WebBasedCrawler:
self.crawler = kakaocrawl.KakaoMainCrawler()
elif platform == "navercafe":
self.crawler = navercrawl.NaverCafeMainAreaCrawler()
elif platform == "facebook":
elif platform == 'facebook':
self.crawler = facebookcrawlbs.FacebookMainCrawler()
elif platform == 'twitter':
self.crawler = twittercrawl.TwitterMainCrawler()
elif platform == 'youtube':
self.crawler = youtubecrawl.YoutubeMainCrawler()
else:
self.crawler = None
raise Exception
@@ -38,7 +44,7 @@ class WebBasedCrawler:
browser_opt = ('chrome', "ie", "opera", "firefox")
platform_opt = ('instagram', 'kakaostory', 'navercafe', "facebook")
platform_opt = ('instagram', 'kakaostory', 'navercafe', 'facebook', 'twitter', 'youtube')
def get_browser_info(platform_, file_name="browser.txt"):
@@ -73,7 +79,7 @@ def get_browser_info(platform_, file_name="browser.txt"):
if __name__ == '__main__':
"""
sys.argv[0] webbasedcrawler.py
sys.argv[1] instagram, kakaochannel, navercafe, facebook
sys.argv[1] instagram, kakaochannel, navercafe, facebook, twitter, youtube
sys.argv[2] keyword_id
sys.argv[3] data group
sys.argv[4] start_day
@@ -85,8 +91,7 @@ if __name__ == '__main__':
else:
print_and_flush("Check Argumenets!")
exit(1)
crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2],
sys.argv[3], sys.argv[4], sys.argv[5])
crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
crawler.start()
print_and_flush("Finished Crawling :)")
exit(0)

View File

View File

@@ -0,0 +1,7 @@
class YoutubeMainCrawl:
def __init__(self):
pass
def start(self):
pass