#-*- coding: utf-8 -*- ''' Created on 2015. 12. 8. @author: cococo ''' import sys from insta import instacrawl from kakao import kakaocrawl from naver import navercrawl from facebook import facebookcrawl from facebook import facebookcrawlbs from twitter import twittercrawl # from youtube import youtubecrawl from base.baseclasses import print_and_flush class WebBasedCrawler: def __init__(self, browser=None, platform=None, keyword_id=None, db_num=None, before_day=None, until_page=None): self.set_arguments(browser, platform, keyword_id, db_num, before_day, until_page) def set_arguments(self, browser, platform, keyword_id, db_num, before_day, until_page): if platform == "instagram": self.crawler = instacrawl.InstaMainCrawler() elif platform == "kakaochannel": self.crawler = kakaocrawl.KakaoMainCrawler() elif platform == "navercafe": self.crawler = navercrawl.NaverCafeMainAreaCrawler() elif platform == 'facebook': self.crawler = facebookcrawlbs.FacebookMainCrawler() elif platform == 'twitter': self.crawler = twittercrawl.TwitterCrawler() elif platform == 'youtube': self.crawler = youtubecrawl.YoutubeMainCrawler() else: self.crawler = None raise Exception self.crawler.set_arguments(browser, keyword_id, db_num, before_day, until_page) def start(self): self.crawler.start() browser_opt = ('chrome', "ie", "opera", "firefox") platform_opt = ('instagram', 'kakaostory', 'navercafe', 'facebook', 'twitter', 'youtube') def get_browser_info(platform_, file_name="browser.txt"): if sys.platform == 'win32': options = {'default': 'ie'} else: options = {'default': 'firefox'} try: with open(file_name, 'r') as f: for line in f: if line.startswith("#"): continue elif len(line.strip()) < 1: continue else: platform, browser = line.split("=") platform = platform.strip() browser = browser.strip() if (platform not in options.keys() and platform not in platform_opt) or browser not in browser_opt: print_and_flush("check option: " + line) else: options[platform] = browser except FileNotFoundError: print_and_flush("browser.txt file is not exists") print_and_flush("use " + options['default'] + " browser") except Exception as e: print_and_flush(e) print_and_flush("Unknown error occurs") exit(1) return options.get(platform_, options['default']) if __name__ == '__main__': """ sys.argv[0] webbasedcrawler.py sys.argv[1] instagram, kakaochannel, navercafe, facebook, twitter, youtube sys.argv[2] keyword_id sys.argv[3] data group sys.argv[4] start_day sys.argv[5] until_page """ if len(sys.argv) == 6: print_and_flush("Python Crawling Executed") else: print_and_flush("Check Argumenets!") exit(1) crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]) crawler.start() print_and_flush("Finished Crawling :)") exit(0)