트위터 크롤러 상위 작성자 표시

This commit is contained in:
mjjo
2017-08-10 17:00:12 +09:00
parent 787889a201
commit aa2f5b9f71
3 changed files with 5 additions and 4 deletions

View File

@@ -161,7 +161,7 @@ class Proxy2Handler:
if resp.ok:
instance.set_block_at(platform, None)
alive_cnt += 1
print('proxy {}:{} alive'.format(instance.ip, instance.port))
# print('proxy {}:{} alive'.format(instance.ip, instance.port))
else:
instance.set_block_at(platform, datetime.datetime.now())

View File

@@ -227,7 +227,7 @@ class TwitterCrawler:
for container_tags in reply_container_tags:
tweet_tags = container_tags.select('div.tweet')
if len(tweet_tags) > 0:
tweet = TweetParser.parse(tweet_tags[0], self.default_config.keyword_id, parent_tw.depth+1, top_tw)
tweet = TweetParser.parse(tweet_tags[0], self.default_config.keyword_id, parent_tw.depth+1, parent_tw, top_tw)
# print('[{}]>>> {} {}: {} ({}) ({})'.format(proc_id, tweet.created_at, tweet.user_name, tweet.text[:20], tweet.depth, tweet.tweet_link))
print('[{}] reply {} [{}]'.format(proc_id, tweet.top_link, 'ok'))
self.insert_content_pool(proc_id, content_queue, tweet, top_tw)

View File

@@ -5,10 +5,11 @@ import bs4
import datetime
import pytz
class TweetParser:
@staticmethod
def parse(tag, keyword_id, depth=0, top_tw: Tweet=None):
def parse(tag, keyword_id, depth=0, parent_tw: Tweet=None, top_tw: Tweet=None):
tweet = Tweet()
tweet.tweet_id = int(tag.attrs['data-tweet-id'])
@@ -62,7 +63,7 @@ class TweetParser:
tweet.platform_form = 'post'
tweet.platform_title = top_tw.user_id if top_tw else tweet.user_id
tweet.article_form = 'body' if tweet.depth is 0 else 'reply'
# tweet.article_parent = None
tweet.article_parent = parent_tw.user_name if parent_tw else None
tweet.article_id = tweet.user_id
tweet.article_nickname = tweet.user_name
# tweet.article_title = None