runner exception 추가
This commit is contained in:
@@ -135,7 +135,7 @@ class Proxy2Handler:
|
|||||||
else:
|
else:
|
||||||
proxies = proxy_crawler.crawl_proxies()
|
proxies = proxy_crawler.crawl_proxies()
|
||||||
self.insert_all(proxies)
|
self.insert_all(proxies)
|
||||||
self.unlock()
|
self.lock_leave()
|
||||||
return self.get(platform, proc_id)
|
return self.get(platform, proc_id)
|
||||||
|
|
||||||
def insert(self, ip, port):
|
def insert(self, ip, port):
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from twitter.twparser import TweetParser
|
|||||||
import base.proxy
|
import base.proxy
|
||||||
import base.proxy2 as proxy2
|
import base.proxy2 as proxy2
|
||||||
import base.baseclasses
|
import base.baseclasses
|
||||||
|
import base.logger as logger
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import bs4
|
import bs4
|
||||||
@@ -122,57 +123,61 @@ class TwitterCrawler:
|
|||||||
}
|
}
|
||||||
|
|
||||||
def runner_proc(self, proc_id, content_queue, result_queue, config):
|
def runner_proc(self, proc_id, content_queue, result_queue, config):
|
||||||
print('[{}] {} to {} runner thread start'.format(proc_id, config.start_str, config.end_str))
|
try:
|
||||||
|
print('[{}] {} to {} runner thread start'.format(proc_id, config.start_str, config.end_str))
|
||||||
|
|
||||||
b_continue = True
|
b_continue = True
|
||||||
min_tweet_id = None
|
min_tweet_id = None
|
||||||
max_tweet_id = None
|
max_tweet_id = None
|
||||||
max_position = ''
|
max_position = ''
|
||||||
tweet_count = 0
|
tweet_count = 0
|
||||||
|
|
||||||
while b_continue:
|
while b_continue:
|
||||||
url = self.get_timeline_url(config.keywords[0], config.start_str, config.end_str, max_position)
|
url = self.get_timeline_url(config.keywords[0], config.start_str, config.end_str, max_position)
|
||||||
j = self.get_page_data(url, True, proc_id)
|
j = self.get_page_data(url, True, proc_id)
|
||||||
soup = bs4.BeautifulSoup(j['items_html'], 'lxml')
|
soup = bs4.BeautifulSoup(j['items_html'], 'lxml')
|
||||||
tweet_tags = soup.select("div.tweet")
|
tweet_tags = soup.select("div.tweet")
|
||||||
|
|
||||||
tweet_ids = []
|
tweet_ids = []
|
||||||
for tw in tweet_tags:
|
for tw in tweet_tags:
|
||||||
tweet = TweetParser.parse(tw, config.keyword_id)
|
tweet = TweetParser.parse(tw, config.keyword_id)
|
||||||
tweet_ids.append(tweet.tweet_id)
|
tweet_ids.append(tweet.tweet_id)
|
||||||
|
|
||||||
if tweet.is_reply is True:
|
if tweet.is_reply is True:
|
||||||
# print(' ## {}: {}...'.format(tweet.user_name, tweet.text[:20]))
|
# print(' ## {}: {}...'.format(tweet.user_name, tweet.text[:20]))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if tweet.reply_cnt > 0:
|
if tweet.reply_cnt > 0:
|
||||||
self.insert_content_pool(proc_id, content_queue, tweet, tweet)
|
self.insert_content_pool(proc_id, content_queue, tweet, tweet)
|
||||||
self.db_helper.insert_tweet(tweet, config.db_num)
|
self.db_helper.insert_tweet(tweet, config.db_num)
|
||||||
|
|
||||||
# print('{} {}: {}...'.format(tweet.created_at, tweet.user_name, tweet.text[:20]))
|
# print('{} {}: {}...'.format(tweet.created_at, tweet.user_name, tweet.text[:20]))
|
||||||
print('[{}] body {} ({}) [{}]'.format(proc_id, tweet.top_link, tweet.created_at, 'ok'))
|
print('[{}] body {} ({}) [{}]'.format(proc_id, tweet.top_link, tweet.created_at, 'ok'))
|
||||||
|
|
||||||
count = len(tweet_tags)
|
count = len(tweet_tags)
|
||||||
tweet_count += count
|
tweet_count += count
|
||||||
|
|
||||||
b_continue = count > 0
|
b_continue = count > 0
|
||||||
# b_continue = j['has_more_items']
|
# b_continue = j['has_more_items']
|
||||||
if b_continue:
|
if b_continue:
|
||||||
if min_tweet_id is None:
|
if min_tweet_id is None:
|
||||||
min_tweet_id = tweet_ids[0]
|
min_tweet_id = tweet_ids[0]
|
||||||
max_tweet_id = tweet_ids[-1]
|
max_tweet_id = tweet_ids[-1]
|
||||||
|
|
||||||
if 'min_position' in j:
|
if 'min_position' in j:
|
||||||
max_position = j['min_position']
|
max_position = j['min_position']
|
||||||
else:
|
else:
|
||||||
max_position = 'TWEET-{}-{}'.format(max_tweet_id, min_tweet_id)
|
max_position = 'TWEET-{}-{}'.format(max_tweet_id, min_tweet_id)
|
||||||
|
|
||||||
|
print('[{}] {} to {} runner thread finished {}'.format(proc_id, config.start_str, config.end_str, tweet_count))
|
||||||
|
result_queue.put({
|
||||||
|
'proc_id': proc_id,
|
||||||
|
'count': tweet_count,
|
||||||
|
})
|
||||||
|
# self.runner_processing[proc_id].value = False
|
||||||
|
except Exception as e:
|
||||||
|
logger.log(e, logger.LogLevel.ERROR)
|
||||||
|
|
||||||
print('[{}] {} to {} runner thread finished {}'.format(proc_id, config.start_str, config.end_str, tweet_count))
|
|
||||||
result_queue.put({
|
|
||||||
'proc_id': proc_id,
|
|
||||||
'count': tweet_count,
|
|
||||||
})
|
|
||||||
# self.runner_processing[proc_id].value = False
|
|
||||||
return proc_id, tweet_count,
|
return proc_id, tweet_count,
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
Reference in New Issue
Block a user