runner exception 추가
This commit is contained in:
@@ -6,6 +6,7 @@ from twitter.twparser import TweetParser
|
||||
import base.proxy
|
||||
import base.proxy2 as proxy2
|
||||
import base.baseclasses
|
||||
import base.logger as logger
|
||||
|
||||
import requests
|
||||
import bs4
|
||||
@@ -122,57 +123,61 @@ class TwitterCrawler:
|
||||
}
|
||||
|
||||
def runner_proc(self, proc_id, content_queue, result_queue, config):
|
||||
print('[{}] {} to {} runner thread start'.format(proc_id, config.start_str, config.end_str))
|
||||
try:
|
||||
print('[{}] {} to {} runner thread start'.format(proc_id, config.start_str, config.end_str))
|
||||
|
||||
b_continue = True
|
||||
min_tweet_id = None
|
||||
max_tweet_id = None
|
||||
max_position = ''
|
||||
tweet_count = 0
|
||||
b_continue = True
|
||||
min_tweet_id = None
|
||||
max_tweet_id = None
|
||||
max_position = ''
|
||||
tweet_count = 0
|
||||
|
||||
while b_continue:
|
||||
url = self.get_timeline_url(config.keywords[0], config.start_str, config.end_str, max_position)
|
||||
j = self.get_page_data(url, True, proc_id)
|
||||
soup = bs4.BeautifulSoup(j['items_html'], 'lxml')
|
||||
tweet_tags = soup.select("div.tweet")
|
||||
while b_continue:
|
||||
url = self.get_timeline_url(config.keywords[0], config.start_str, config.end_str, max_position)
|
||||
j = self.get_page_data(url, True, proc_id)
|
||||
soup = bs4.BeautifulSoup(j['items_html'], 'lxml')
|
||||
tweet_tags = soup.select("div.tweet")
|
||||
|
||||
tweet_ids = []
|
||||
for tw in tweet_tags:
|
||||
tweet = TweetParser.parse(tw, config.keyword_id)
|
||||
tweet_ids.append(tweet.tweet_id)
|
||||
tweet_ids = []
|
||||
for tw in tweet_tags:
|
||||
tweet = TweetParser.parse(tw, config.keyword_id)
|
||||
tweet_ids.append(tweet.tweet_id)
|
||||
|
||||
if tweet.is_reply is True:
|
||||
# print(' ## {}: {}...'.format(tweet.user_name, tweet.text[:20]))
|
||||
continue
|
||||
if tweet.is_reply is True:
|
||||
# print(' ## {}: {}...'.format(tweet.user_name, tweet.text[:20]))
|
||||
continue
|
||||
|
||||
if tweet.reply_cnt > 0:
|
||||
self.insert_content_pool(proc_id, content_queue, tweet, tweet)
|
||||
self.db_helper.insert_tweet(tweet, config.db_num)
|
||||
if tweet.reply_cnt > 0:
|
||||
self.insert_content_pool(proc_id, content_queue, tweet, tweet)
|
||||
self.db_helper.insert_tweet(tweet, config.db_num)
|
||||
|
||||
# print('{} {}: {}...'.format(tweet.created_at, tweet.user_name, tweet.text[:20]))
|
||||
print('[{}] body {} ({}) [{}]'.format(proc_id, tweet.top_link, tweet.created_at, 'ok'))
|
||||
# print('{} {}: {}...'.format(tweet.created_at, tweet.user_name, tweet.text[:20]))
|
||||
print('[{}] body {} ({}) [{}]'.format(proc_id, tweet.top_link, tweet.created_at, 'ok'))
|
||||
|
||||
count = len(tweet_tags)
|
||||
tweet_count += count
|
||||
count = len(tweet_tags)
|
||||
tweet_count += count
|
||||
|
||||
b_continue = count > 0
|
||||
# b_continue = j['has_more_items']
|
||||
if b_continue:
|
||||
if min_tweet_id is None:
|
||||
min_tweet_id = tweet_ids[0]
|
||||
max_tweet_id = tweet_ids[-1]
|
||||
b_continue = count > 0
|
||||
# b_continue = j['has_more_items']
|
||||
if b_continue:
|
||||
if min_tweet_id is None:
|
||||
min_tweet_id = tweet_ids[0]
|
||||
max_tweet_id = tweet_ids[-1]
|
||||
|
||||
if 'min_position' in j:
|
||||
max_position = j['min_position']
|
||||
else:
|
||||
max_position = 'TWEET-{}-{}'.format(max_tweet_id, min_tweet_id)
|
||||
if 'min_position' in j:
|
||||
max_position = j['min_position']
|
||||
else:
|
||||
max_position = 'TWEET-{}-{}'.format(max_tweet_id, min_tweet_id)
|
||||
|
||||
print('[{}] {} to {} runner thread finished {}'.format(proc_id, config.start_str, config.end_str, tweet_count))
|
||||
result_queue.put({
|
||||
'proc_id': proc_id,
|
||||
'count': tweet_count,
|
||||
})
|
||||
# self.runner_processing[proc_id].value = False
|
||||
except Exception as e:
|
||||
logger.log(e, logger.LogLevel.ERROR)
|
||||
|
||||
print('[{}] {} to {} runner thread finished {}'.format(proc_id, config.start_str, config.end_str, tweet_count))
|
||||
result_queue.put({
|
||||
'proc_id': proc_id,
|
||||
'count': tweet_count,
|
||||
})
|
||||
# self.runner_processing[proc_id].value = False
|
||||
return proc_id, tweet_count,
|
||||
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user