twitter, youtube 크롤러 추가 시작
This commit is contained in:
0
WebBasedCrawler/twitter/__init__.py
Normal file
0
WebBasedCrawler/twitter/__init__.py
Normal file
36
WebBasedCrawler/twitter/twittercrawl.py
Normal file
36
WebBasedCrawler/twitter/twittercrawl.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
|
||||||
|
class TwitterMainCrawler:
|
||||||
|
def __init__(self):
|
||||||
|
self.keyword_id = -1
|
||||||
|
pass
|
||||||
|
|
||||||
|
def init_keyword_id(self, keyword_id):
|
||||||
|
if type(keyword_id) != int:
|
||||||
|
self.keyword_id = int(keyword_id)
|
||||||
|
else:
|
||||||
|
self.keyword_id = keyword_id
|
||||||
|
# self.crawl_init.get_keyword_parameters(keyword_id)
|
||||||
|
# self.crawl_init.disconnect()
|
||||||
|
pass
|
||||||
|
|
||||||
|
def init_db(self, db_num):
|
||||||
|
# self.send_to_db.set_db(db_num)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def init_before_day(self, before_day):
|
||||||
|
# self.crawl_init.set_before_day(before_day)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def init_until_page(self, until_page):
|
||||||
|
# self.crawl_init.set_until_page(until_page)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def set_arguments(self, browser, keyword_id, db_num, before_day, until_page):
|
||||||
|
self.init_keyword_id(keyword_id)
|
||||||
|
self.init_db(db_num)
|
||||||
|
self.init_before_day(before_day)
|
||||||
|
self.init_until_page(until_page)
|
||||||
|
# self.init_browser(browser)
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
pass
|
||||||
@@ -11,6 +11,8 @@ from kakao import kakaocrawl
|
|||||||
from naver import navercrawl
|
from naver import navercrawl
|
||||||
from facebook import facebookcrawl
|
from facebook import facebookcrawl
|
||||||
from facebook import facebookcrawlbs
|
from facebook import facebookcrawlbs
|
||||||
|
from twitter import twittercrawl
|
||||||
|
from youtube import youtubecrawl
|
||||||
|
|
||||||
from base.baseclasses import print_and_flush
|
from base.baseclasses import print_and_flush
|
||||||
|
|
||||||
@@ -26,8 +28,12 @@ class WebBasedCrawler:
|
|||||||
self.crawler = kakaocrawl.KakaoMainCrawler()
|
self.crawler = kakaocrawl.KakaoMainCrawler()
|
||||||
elif platform == "navercafe":
|
elif platform == "navercafe":
|
||||||
self.crawler = navercrawl.NaverCafeMainAreaCrawler()
|
self.crawler = navercrawl.NaverCafeMainAreaCrawler()
|
||||||
elif platform == "facebook":
|
elif platform == 'facebook':
|
||||||
self.crawler = facebookcrawlbs.FacebookMainCrawler()
|
self.crawler = facebookcrawlbs.FacebookMainCrawler()
|
||||||
|
elif platform == 'twitter':
|
||||||
|
self.crawler = twittercrawl.TwitterMainCrawler()
|
||||||
|
elif platform == 'youtube':
|
||||||
|
self.crawler = youtubecrawl.YoutubeMainCrawler()
|
||||||
else:
|
else:
|
||||||
self.crawler = None
|
self.crawler = None
|
||||||
raise Exception
|
raise Exception
|
||||||
@@ -38,7 +44,7 @@ class WebBasedCrawler:
|
|||||||
|
|
||||||
|
|
||||||
browser_opt = ('chrome', "ie", "opera", "firefox")
|
browser_opt = ('chrome', "ie", "opera", "firefox")
|
||||||
platform_opt = ('instagram', 'kakaostory', 'navercafe', "facebook")
|
platform_opt = ('instagram', 'kakaostory', 'navercafe', 'facebook', 'twitter', 'youtube')
|
||||||
|
|
||||||
|
|
||||||
def get_browser_info(platform_, file_name="browser.txt"):
|
def get_browser_info(platform_, file_name="browser.txt"):
|
||||||
@@ -73,7 +79,7 @@ def get_browser_info(platform_, file_name="browser.txt"):
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
"""
|
"""
|
||||||
sys.argv[0] webbasedcrawler.py
|
sys.argv[0] webbasedcrawler.py
|
||||||
sys.argv[1] instagram, kakaochannel, navercafe, facebook
|
sys.argv[1] instagram, kakaochannel, navercafe, facebook, twitter, youtube
|
||||||
sys.argv[2] keyword_id
|
sys.argv[2] keyword_id
|
||||||
sys.argv[3] data group
|
sys.argv[3] data group
|
||||||
sys.argv[4] start_day
|
sys.argv[4] start_day
|
||||||
@@ -85,8 +91,7 @@ if __name__ == '__main__':
|
|||||||
else:
|
else:
|
||||||
print_and_flush("Check Argumenets!")
|
print_and_flush("Check Argumenets!")
|
||||||
exit(1)
|
exit(1)
|
||||||
crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2],
|
crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
|
||||||
sys.argv[3], sys.argv[4], sys.argv[5])
|
|
||||||
crawler.start()
|
crawler.start()
|
||||||
print_and_flush("Finished Crawling :)")
|
print_and_flush("Finished Crawling :)")
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|||||||
0
WebBasedCrawler/youtube/__init__.py
Normal file
0
WebBasedCrawler/youtube/__init__.py
Normal file
7
WebBasedCrawler/youtube/youtubecrawl.py
Normal file
7
WebBasedCrawler/youtube/youtubecrawl.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
|
||||||
|
class YoutubeMainCrawl:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
pass
|
||||||
Reference in New Issue
Block a user