From 81bbad8f19fe1c7d37df50d08f9a416c8bc0af62 Mon Sep 17 00:00:00 2001 From: mjjo Date: Tue, 4 Jul 2017 14:29:41 +0900 Subject: [PATCH] =?UTF-8?q?twitter,=20youtube=20=ED=81=AC=EB=A1=A4?= =?UTF-8?q?=EB=9F=AC=20=EC=B6=94=EA=B0=80=20=EC=8B=9C=EC=9E=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebBasedCrawler/twitter/__init__.py | 0 WebBasedCrawler/twitter/twittercrawl.py | 36 +++++++++++++++++++++++++ WebBasedCrawler/webbasedcrawler.py | 15 +++++++---- WebBasedCrawler/youtube/__init__.py | 0 WebBasedCrawler/youtube/youtubecrawl.py | 7 +++++ 5 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 WebBasedCrawler/twitter/__init__.py create mode 100644 WebBasedCrawler/twitter/twittercrawl.py create mode 100644 WebBasedCrawler/youtube/__init__.py create mode 100644 WebBasedCrawler/youtube/youtubecrawl.py diff --git a/WebBasedCrawler/twitter/__init__.py b/WebBasedCrawler/twitter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/WebBasedCrawler/twitter/twittercrawl.py b/WebBasedCrawler/twitter/twittercrawl.py new file mode 100644 index 0000000..585ceb0 --- /dev/null +++ b/WebBasedCrawler/twitter/twittercrawl.py @@ -0,0 +1,36 @@ + +class TwitterMainCrawler: + def __init__(self): + self.keyword_id = -1 + pass + + def init_keyword_id(self, keyword_id): + if type(keyword_id) != int: + self.keyword_id = int(keyword_id) + else: + self.keyword_id = keyword_id + # self.crawl_init.get_keyword_parameters(keyword_id) + # self.crawl_init.disconnect() + pass + + def init_db(self, db_num): + # self.send_to_db.set_db(db_num) + pass + + def init_before_day(self, before_day): + # self.crawl_init.set_before_day(before_day) + pass + + def init_until_page(self, until_page): + # self.crawl_init.set_until_page(until_page) + pass + + def set_arguments(self, browser, keyword_id, db_num, before_day, until_page): + self.init_keyword_id(keyword_id) + self.init_db(db_num) + self.init_before_day(before_day) + self.init_until_page(until_page) + # self.init_browser(browser) + + def start(self): + pass \ No newline at end of file diff --git a/WebBasedCrawler/webbasedcrawler.py b/WebBasedCrawler/webbasedcrawler.py index 9f03e71..75783cc 100644 --- a/WebBasedCrawler/webbasedcrawler.py +++ b/WebBasedCrawler/webbasedcrawler.py @@ -11,6 +11,8 @@ from kakao import kakaocrawl from naver import navercrawl from facebook import facebookcrawl from facebook import facebookcrawlbs +from twitter import twittercrawl +from youtube import youtubecrawl from base.baseclasses import print_and_flush @@ -26,8 +28,12 @@ class WebBasedCrawler: self.crawler = kakaocrawl.KakaoMainCrawler() elif platform == "navercafe": self.crawler = navercrawl.NaverCafeMainAreaCrawler() - elif platform == "facebook": + elif platform == 'facebook': self.crawler = facebookcrawlbs.FacebookMainCrawler() + elif platform == 'twitter': + self.crawler = twittercrawl.TwitterMainCrawler() + elif platform == 'youtube': + self.crawler = youtubecrawl.YoutubeMainCrawler() else: self.crawler = None raise Exception @@ -38,7 +44,7 @@ class WebBasedCrawler: browser_opt = ('chrome', "ie", "opera", "firefox") -platform_opt = ('instagram', 'kakaostory', 'navercafe', "facebook") +platform_opt = ('instagram', 'kakaostory', 'navercafe', 'facebook', 'twitter', 'youtube') def get_browser_info(platform_, file_name="browser.txt"): @@ -73,7 +79,7 @@ def get_browser_info(platform_, file_name="browser.txt"): if __name__ == '__main__': """ sys.argv[0] webbasedcrawler.py - sys.argv[1] instagram, kakaochannel, navercafe, facebook + sys.argv[1] instagram, kakaochannel, navercafe, facebook, twitter, youtube sys.argv[2] keyword_id sys.argv[3] data group sys.argv[4] start_day @@ -85,8 +91,7 @@ if __name__ == '__main__': else: print_and_flush("Check Argumenets!") exit(1) - crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2], - sys.argv[3], sys.argv[4], sys.argv[5]) + crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]) crawler.start() print_and_flush("Finished Crawling :)") exit(0) diff --git a/WebBasedCrawler/youtube/__init__.py b/WebBasedCrawler/youtube/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/WebBasedCrawler/youtube/youtubecrawl.py b/WebBasedCrawler/youtube/youtubecrawl.py new file mode 100644 index 0000000..bb34752 --- /dev/null +++ b/WebBasedCrawler/youtube/youtubecrawl.py @@ -0,0 +1,7 @@ + +class YoutubeMainCrawl: + def __init__(self): + pass + + def start(self): + pass \ No newline at end of file