- requirements.txt 추가

- print 구문 주석
This commit is contained in:
mjjo
2017-07-27 11:27:27 +09:00
parent 2f324b1710
commit fb7b3949d3
3 changed files with 9 additions and 13 deletions

View File

@@ -0,0 +1,3 @@
requests
bs4
pytz

View File

@@ -43,13 +43,6 @@ class TwitterConfig:
self.state = params['state']
self.platform = params['platform']
# debug
self.platform = 14
# self.start_str = '2017-05-01'
# self.end_str = '2017-05-02'
# self.start = datetime.datetime.strptime(self.start_str, '%Y-%m-%d')
# self.end = datetime.datetime.strptime(self.end_str, '%Y-%m-%d')
def split(self):
split_list = []
new_end = self.end

View File

@@ -99,7 +99,7 @@ class TwitterCrawler():
tweet = TweetParser.parse(tw, config.keyword_id)
if tweet.is_reply is True:
print(' ## {}: {}...'.format(tweet.user_name, tweet.text[:20]))
# print(' ## {}: {}...'.format(tweet.user_name, tweet.text[:20]))
continue
if tweet.reply_cnt > 0:
@@ -107,7 +107,7 @@ class TwitterCrawler():
self.db_helper.insert_tweet(tweet, config.db_num)
print('{} {}: {}...'.format(tweet.created_at, tweet.user_name, tweet.text[:20]))
# print('{} {}: {}...'.format(tweet.created_at, tweet.user_name, tweet.text[:20]))
count = len(tweet_tags)
if count == 0:
@@ -125,7 +125,7 @@ class TwitterCrawler():
@staticmethod
def insert_content_pool(proc_id: int, qu, tweet: Tweet, tweet_top: Tweet):
print(' [{}] pool insert: {} ({})'.format(proc_id, tweet.text[:20] if tweet.text else '', tweet.tweet_link))
# print(' [{}] pool insert: {} ({})'.format(proc_id, tweet.text[:20] if tweet.text else '', tweet.tweet_link))
qu.put((tweet, tweet_top,))
@staticmethod
@@ -153,7 +153,7 @@ class TwitterCrawler():
if not parent_tw:
break
print(' [{}] <<< parent : {} ({})'.format(proc_id, parent_tw.text[:20], parent_tw.tweet_link))
# print(' [{}] <<< parent : {} ({})'.format(proc_id, parent_tw.text[:20], parent_tw.tweet_link))
max_position = ''
@@ -176,7 +176,7 @@ class TwitterCrawler():
tweet_tags = container_tags.select('div.tweet')
if len(tweet_tags) > 0:
tweet = TweetParser.parse(tweet_tags[0], self.default_config.keyword_id, parent_tw.depth+1, top_tw)
print('[{}]>>> {} {}: {} ({}) ({})'.format(proc_id, tweet.created_at, tweet.user_name, tweet.text[:20], tweet.depth, tweet.tweet_link))
# print('[{}]>>> {} {}: {} ({}) ({})'.format(proc_id, tweet.created_at, tweet.user_name, tweet.text[:20], tweet.depth, tweet.tweet_link))
self.insert_content_pool(proc_id, content_queue, tweet, top_tw)
self.db_helper.insert_tweet(tweet, self.default_config.db_num)
tweet_count += 1
@@ -185,7 +185,7 @@ class TwitterCrawler():
if b_continue:
max_position = j['min_position']
result_queue.put(tweet_count)
result_queue.put((proc_id, tweet_count))
print('[{}] content thread finished'.format(proc_id))
return proc_id, tweet_count,