twitter, youtube 크롤러 추가 시작

This commit is contained in:
mjjo
2017-07-04 14:29:41 +09:00
parent f6310e7922
commit 81bbad8f19
5 changed files with 53 additions and 5 deletions

View File

View File

@@ -0,0 +1,36 @@
class TwitterMainCrawler:
def __init__(self):
self.keyword_id = -1
pass
def init_keyword_id(self, keyword_id):
if type(keyword_id) != int:
self.keyword_id = int(keyword_id)
else:
self.keyword_id = keyword_id
# self.crawl_init.get_keyword_parameters(keyword_id)
# self.crawl_init.disconnect()
pass
def init_db(self, db_num):
# self.send_to_db.set_db(db_num)
pass
def init_before_day(self, before_day):
# self.crawl_init.set_before_day(before_day)
pass
def init_until_page(self, until_page):
# self.crawl_init.set_until_page(until_page)
pass
def set_arguments(self, browser, keyword_id, db_num, before_day, until_page):
self.init_keyword_id(keyword_id)
self.init_db(db_num)
self.init_before_day(before_day)
self.init_until_page(until_page)
# self.init_browser(browser)
def start(self):
pass

View File

@@ -11,6 +11,8 @@ from kakao import kakaocrawl
from naver import navercrawl from naver import navercrawl
from facebook import facebookcrawl from facebook import facebookcrawl
from facebook import facebookcrawlbs from facebook import facebookcrawlbs
from twitter import twittercrawl
from youtube import youtubecrawl
from base.baseclasses import print_and_flush from base.baseclasses import print_and_flush
@@ -26,8 +28,12 @@ class WebBasedCrawler:
self.crawler = kakaocrawl.KakaoMainCrawler() self.crawler = kakaocrawl.KakaoMainCrawler()
elif platform == "navercafe": elif platform == "navercafe":
self.crawler = navercrawl.NaverCafeMainAreaCrawler() self.crawler = navercrawl.NaverCafeMainAreaCrawler()
elif platform == "facebook": elif platform == 'facebook':
self.crawler = facebookcrawlbs.FacebookMainCrawler() self.crawler = facebookcrawlbs.FacebookMainCrawler()
elif platform == 'twitter':
self.crawler = twittercrawl.TwitterMainCrawler()
elif platform == 'youtube':
self.crawler = youtubecrawl.YoutubeMainCrawler()
else: else:
self.crawler = None self.crawler = None
raise Exception raise Exception
@@ -38,7 +44,7 @@ class WebBasedCrawler:
browser_opt = ('chrome', "ie", "opera", "firefox") browser_opt = ('chrome', "ie", "opera", "firefox")
platform_opt = ('instagram', 'kakaostory', 'navercafe', "facebook") platform_opt = ('instagram', 'kakaostory', 'navercafe', 'facebook', 'twitter', 'youtube')
def get_browser_info(platform_, file_name="browser.txt"): def get_browser_info(platform_, file_name="browser.txt"):
@@ -73,7 +79,7 @@ def get_browser_info(platform_, file_name="browser.txt"):
if __name__ == '__main__': if __name__ == '__main__':
""" """
sys.argv[0] webbasedcrawler.py sys.argv[0] webbasedcrawler.py
sys.argv[1] instagram, kakaochannel, navercafe, facebook sys.argv[1] instagram, kakaochannel, navercafe, facebook, twitter, youtube
sys.argv[2] keyword_id sys.argv[2] keyword_id
sys.argv[3] data group sys.argv[3] data group
sys.argv[4] start_day sys.argv[4] start_day
@@ -85,8 +91,7 @@ if __name__ == '__main__':
else: else:
print_and_flush("Check Argumenets!") print_and_flush("Check Argumenets!")
exit(1) exit(1)
crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2], crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
sys.argv[3], sys.argv[4], sys.argv[5])
crawler.start() crawler.start()
print_and_flush("Finished Crawling :)") print_and_flush("Finished Crawling :)")
exit(0) exit(0)

View File

View File

@@ -0,0 +1,7 @@
class YoutubeMainCrawl:
def __init__(self):
pass
def start(self):
pass