트위터 크롤러 상위 작성자 표시
This commit is contained in:
@@ -161,7 +161,7 @@ class Proxy2Handler:
|
|||||||
if resp.ok:
|
if resp.ok:
|
||||||
instance.set_block_at(platform, None)
|
instance.set_block_at(platform, None)
|
||||||
alive_cnt += 1
|
alive_cnt += 1
|
||||||
print('proxy {}:{} alive'.format(instance.ip, instance.port))
|
# print('proxy {}:{} alive'.format(instance.ip, instance.port))
|
||||||
else:
|
else:
|
||||||
instance.set_block_at(platform, datetime.datetime.now())
|
instance.set_block_at(platform, datetime.datetime.now())
|
||||||
|
|
||||||
|
|||||||
@@ -227,7 +227,7 @@ class TwitterCrawler:
|
|||||||
for container_tags in reply_container_tags:
|
for container_tags in reply_container_tags:
|
||||||
tweet_tags = container_tags.select('div.tweet')
|
tweet_tags = container_tags.select('div.tweet')
|
||||||
if len(tweet_tags) > 0:
|
if len(tweet_tags) > 0:
|
||||||
tweet = TweetParser.parse(tweet_tags[0], self.default_config.keyword_id, parent_tw.depth+1, top_tw)
|
tweet = TweetParser.parse(tweet_tags[0], self.default_config.keyword_id, parent_tw.depth+1, parent_tw, top_tw)
|
||||||
# print('[{}]>>> {} {}: {} ({}) ({})'.format(proc_id, tweet.created_at, tweet.user_name, tweet.text[:20], tweet.depth, tweet.tweet_link))
|
# print('[{}]>>> {} {}: {} ({}) ({})'.format(proc_id, tweet.created_at, tweet.user_name, tweet.text[:20], tweet.depth, tweet.tweet_link))
|
||||||
print('[{}] reply {} [{}]'.format(proc_id, tweet.top_link, 'ok'))
|
print('[{}] reply {} [{}]'.format(proc_id, tweet.top_link, 'ok'))
|
||||||
self.insert_content_pool(proc_id, content_queue, tweet, top_tw)
|
self.insert_content_pool(proc_id, content_queue, tweet, top_tw)
|
||||||
|
|||||||
@@ -5,10 +5,11 @@ import bs4
|
|||||||
import datetime
|
import datetime
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
|
|
||||||
class TweetParser:
|
class TweetParser:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse(tag, keyword_id, depth=0, top_tw: Tweet=None):
|
def parse(tag, keyword_id, depth=0, parent_tw: Tweet=None, top_tw: Tweet=None):
|
||||||
tweet = Tweet()
|
tweet = Tweet()
|
||||||
|
|
||||||
tweet.tweet_id = int(tag.attrs['data-tweet-id'])
|
tweet.tweet_id = int(tag.attrs['data-tweet-id'])
|
||||||
@@ -62,7 +63,7 @@ class TweetParser:
|
|||||||
tweet.platform_form = 'post'
|
tweet.platform_form = 'post'
|
||||||
tweet.platform_title = top_tw.user_id if top_tw else tweet.user_id
|
tweet.platform_title = top_tw.user_id if top_tw else tweet.user_id
|
||||||
tweet.article_form = 'body' if tweet.depth is 0 else 'reply'
|
tweet.article_form = 'body' if tweet.depth is 0 else 'reply'
|
||||||
# tweet.article_parent = None
|
tweet.article_parent = parent_tw.user_name if parent_tw else None
|
||||||
tweet.article_id = tweet.user_id
|
tweet.article_id = tweet.user_id
|
||||||
tweet.article_nickname = tweet.user_name
|
tweet.article_nickname = tweet.user_name
|
||||||
# tweet.article_title = None
|
# tweet.article_title = None
|
||||||
|
|||||||
Reference in New Issue
Block a user