Files
clients/WebBasedCrawler/youtube/ytconfig.py
mjjo 1fb61f0b4c 트위터 크롤러 수정
- 프록시를 porxy2 db에 넣고 사용
2017-08-09 15:32:57 +09:00

72 lines
2.1 KiB
Python

import datetime
import copy
class YoutubeConfig:
protocol = 'https'
top_url = 'youtube.com'
search_url = '/i/search/timeline'
conversation_url_form = '/i/{}/conversation/{}'
def __init__(self):
self.keyword_id = -1
self.db_num = -1
self.id = 0
self.realtime = False
self.keywords = []
self.start_str = None
self.start = None
self.end_str = None
self.end = None
self.authorship = None
self.state = None
self.platform = None
def set_param(self, keyword_id, db_num, params):
self.keyword_id = int(keyword_id)
self.db_num = int(db_num)
self.id = int(params['id'])
self.realtime = params['realtime'] == 1
self.keywords = []
for keyword in params['searches'].split(','):
self.keywords.append(keyword.strip())
self.start_str = str(params['start'])
self.end_str = str(params['end'])
self.start = datetime.datetime.strptime(self.start_str, '%Y-%m-%d')
self.end = datetime.datetime.strptime(self.end_str, '%Y-%m-%d')
self.authorship = params['authorship']
self.state = params['state']
self.platform = params['platform']
def reload_realtime(self, before_day):
if not self.realtime:
return
self.end_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
self.end = datetime.datetime.strptime(self.end_str, '%Y-%m-%d')
self.start = self.end + datetime.timedelta(days=int(before_day))
self.start_str = datetime.datetime.strftime(self.start, '%Y-%m-%d')
def split(self):
split_list = []
new_end = self.end
while new_end > self.start:
new_config = copy.deepcopy(self)
new_config.end = new_end
new_end = new_end + datetime.timedelta(days=-1)
new_config.start = new_end
new_config.start_str = new_config.start.strftime('%Y-%m-%d')
new_config.end_str = new_config.end.strftime('%Y-%m-%d')
split_list.append(new_config)
return split_list