twitter, youtube 크롤러 추가 시작
This commit is contained in:
0
WebBasedCrawler/twitter/__init__.py
Normal file
0
WebBasedCrawler/twitter/__init__.py
Normal file
36
WebBasedCrawler/twitter/twittercrawl.py
Normal file
36
WebBasedCrawler/twitter/twittercrawl.py
Normal file
@@ -0,0 +1,36 @@
|
||||
|
||||
class TwitterMainCrawler:
|
||||
def __init__(self):
|
||||
self.keyword_id = -1
|
||||
pass
|
||||
|
||||
def init_keyword_id(self, keyword_id):
|
||||
if type(keyword_id) != int:
|
||||
self.keyword_id = int(keyword_id)
|
||||
else:
|
||||
self.keyword_id = keyword_id
|
||||
# self.crawl_init.get_keyword_parameters(keyword_id)
|
||||
# self.crawl_init.disconnect()
|
||||
pass
|
||||
|
||||
def init_db(self, db_num):
|
||||
# self.send_to_db.set_db(db_num)
|
||||
pass
|
||||
|
||||
def init_before_day(self, before_day):
|
||||
# self.crawl_init.set_before_day(before_day)
|
||||
pass
|
||||
|
||||
def init_until_page(self, until_page):
|
||||
# self.crawl_init.set_until_page(until_page)
|
||||
pass
|
||||
|
||||
def set_arguments(self, browser, keyword_id, db_num, before_day, until_page):
|
||||
self.init_keyword_id(keyword_id)
|
||||
self.init_db(db_num)
|
||||
self.init_before_day(before_day)
|
||||
self.init_until_page(until_page)
|
||||
# self.init_browser(browser)
|
||||
|
||||
def start(self):
|
||||
pass
|
||||
@@ -11,6 +11,8 @@ from kakao import kakaocrawl
|
||||
from naver import navercrawl
|
||||
from facebook import facebookcrawl
|
||||
from facebook import facebookcrawlbs
|
||||
from twitter import twittercrawl
|
||||
from youtube import youtubecrawl
|
||||
|
||||
from base.baseclasses import print_and_flush
|
||||
|
||||
@@ -26,8 +28,12 @@ class WebBasedCrawler:
|
||||
self.crawler = kakaocrawl.KakaoMainCrawler()
|
||||
elif platform == "navercafe":
|
||||
self.crawler = navercrawl.NaverCafeMainAreaCrawler()
|
||||
elif platform == "facebook":
|
||||
elif platform == 'facebook':
|
||||
self.crawler = facebookcrawlbs.FacebookMainCrawler()
|
||||
elif platform == 'twitter':
|
||||
self.crawler = twittercrawl.TwitterMainCrawler()
|
||||
elif platform == 'youtube':
|
||||
self.crawler = youtubecrawl.YoutubeMainCrawler()
|
||||
else:
|
||||
self.crawler = None
|
||||
raise Exception
|
||||
@@ -38,7 +44,7 @@ class WebBasedCrawler:
|
||||
|
||||
|
||||
browser_opt = ('chrome', "ie", "opera", "firefox")
|
||||
platform_opt = ('instagram', 'kakaostory', 'navercafe', "facebook")
|
||||
platform_opt = ('instagram', 'kakaostory', 'navercafe', 'facebook', 'twitter', 'youtube')
|
||||
|
||||
|
||||
def get_browser_info(platform_, file_name="browser.txt"):
|
||||
@@ -73,7 +79,7 @@ def get_browser_info(platform_, file_name="browser.txt"):
|
||||
if __name__ == '__main__':
|
||||
"""
|
||||
sys.argv[0] webbasedcrawler.py
|
||||
sys.argv[1] instagram, kakaochannel, navercafe, facebook
|
||||
sys.argv[1] instagram, kakaochannel, navercafe, facebook, twitter, youtube
|
||||
sys.argv[2] keyword_id
|
||||
sys.argv[3] data group
|
||||
sys.argv[4] start_day
|
||||
@@ -85,8 +91,7 @@ if __name__ == '__main__':
|
||||
else:
|
||||
print_and_flush("Check Argumenets!")
|
||||
exit(1)
|
||||
crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2],
|
||||
sys.argv[3], sys.argv[4], sys.argv[5])
|
||||
crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
|
||||
crawler.start()
|
||||
print_and_flush("Finished Crawling :)")
|
||||
exit(0)
|
||||
|
||||
0
WebBasedCrawler/youtube/__init__.py
Normal file
0
WebBasedCrawler/youtube/__init__.py
Normal file
7
WebBasedCrawler/youtube/youtubecrawl.py
Normal file
7
WebBasedCrawler/youtube/youtubecrawl.py
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
class YoutubeMainCrawl:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def start(self):
|
||||
pass
|
||||
Reference in New Issue
Block a user