runner exception 추가

This commit is contained in:
mjjo
2017-08-10 11:32:08 +09:00
parent 19cd5bb070
commit 3d829e55b5
2 changed files with 47 additions and 42 deletions

View File

@@ -6,6 +6,7 @@ from twitter.twparser import TweetParser
import base.proxy
import base.proxy2 as proxy2
import base.baseclasses
import base.logger as logger
import requests
import bs4
@@ -122,57 +123,61 @@ class TwitterCrawler:
}
def runner_proc(self, proc_id, content_queue, result_queue, config):
print('[{}] {} to {} runner thread start'.format(proc_id, config.start_str, config.end_str))
try:
print('[{}] {} to {} runner thread start'.format(proc_id, config.start_str, config.end_str))
b_continue = True
min_tweet_id = None
max_tweet_id = None
max_position = ''
tweet_count = 0
b_continue = True
min_tweet_id = None
max_tweet_id = None
max_position = ''
tweet_count = 0
while b_continue:
url = self.get_timeline_url(config.keywords[0], config.start_str, config.end_str, max_position)
j = self.get_page_data(url, True, proc_id)
soup = bs4.BeautifulSoup(j['items_html'], 'lxml')
tweet_tags = soup.select("div.tweet")
while b_continue:
url = self.get_timeline_url(config.keywords[0], config.start_str, config.end_str, max_position)
j = self.get_page_data(url, True, proc_id)
soup = bs4.BeautifulSoup(j['items_html'], 'lxml')
tweet_tags = soup.select("div.tweet")
tweet_ids = []
for tw in tweet_tags:
tweet = TweetParser.parse(tw, config.keyword_id)
tweet_ids.append(tweet.tweet_id)
tweet_ids = []
for tw in tweet_tags:
tweet = TweetParser.parse(tw, config.keyword_id)
tweet_ids.append(tweet.tweet_id)
if tweet.is_reply is True:
# print(' ## {}: {}...'.format(tweet.user_name, tweet.text[:20]))
continue
if tweet.is_reply is True:
# print(' ## {}: {}...'.format(tweet.user_name, tweet.text[:20]))
continue
if tweet.reply_cnt > 0:
self.insert_content_pool(proc_id, content_queue, tweet, tweet)
self.db_helper.insert_tweet(tweet, config.db_num)
if tweet.reply_cnt > 0:
self.insert_content_pool(proc_id, content_queue, tweet, tweet)
self.db_helper.insert_tweet(tweet, config.db_num)
# print('{} {}: {}...'.format(tweet.created_at, tweet.user_name, tweet.text[:20]))
print('[{}] body {} ({}) [{}]'.format(proc_id, tweet.top_link, tweet.created_at, 'ok'))
# print('{} {}: {}...'.format(tweet.created_at, tweet.user_name, tweet.text[:20]))
print('[{}] body {} ({}) [{}]'.format(proc_id, tweet.top_link, tweet.created_at, 'ok'))
count = len(tweet_tags)
tweet_count += count
count = len(tweet_tags)
tweet_count += count
b_continue = count > 0
# b_continue = j['has_more_items']
if b_continue:
if min_tweet_id is None:
min_tweet_id = tweet_ids[0]
max_tweet_id = tweet_ids[-1]
b_continue = count > 0
# b_continue = j['has_more_items']
if b_continue:
if min_tweet_id is None:
min_tweet_id = tweet_ids[0]
max_tweet_id = tweet_ids[-1]
if 'min_position' in j:
max_position = j['min_position']
else:
max_position = 'TWEET-{}-{}'.format(max_tweet_id, min_tweet_id)
if 'min_position' in j:
max_position = j['min_position']
else:
max_position = 'TWEET-{}-{}'.format(max_tweet_id, min_tweet_id)
print('[{}] {} to {} runner thread finished {}'.format(proc_id, config.start_str, config.end_str, tweet_count))
result_queue.put({
'proc_id': proc_id,
'count': tweet_count,
})
# self.runner_processing[proc_id].value = False
except Exception as e:
logger.log(e, logger.LogLevel.ERROR)
print('[{}] {} to {} runner thread finished {}'.format(proc_id, config.start_str, config.end_str, tweet_count))
result_queue.put({
'proc_id': proc_id,
'count': tweet_count,
})
# self.runner_processing[proc_id].value = False
return proc_id, tweet_count,
@staticmethod