4 space -> tab으로 수정
This commit is contained in:
@@ -3,81 +3,81 @@ import datetime
|
|||||||
from numbers import Number
|
from numbers import Number
|
||||||
|
|
||||||
class DataDBRow:
|
class DataDBRow:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.platform_name = None
|
self.platform_name = None
|
||||||
self.platform_form = None
|
self.platform_form = None
|
||||||
self.platform_title = None
|
self.platform_title = None
|
||||||
self.article_form = None
|
self.article_form = None
|
||||||
self.article_parent = None
|
self.article_parent = None
|
||||||
self.article_id = None
|
self.article_id = None
|
||||||
self.article_nickname = None
|
self.article_nickname = None
|
||||||
self.article_title = None
|
self.article_title = None
|
||||||
self.article_data = None
|
self.article_data = None
|
||||||
self.article_url = None
|
self.article_url = None
|
||||||
self.article_hit = 0
|
self.article_hit = 0
|
||||||
self.article_date = None
|
self.article_date = None
|
||||||
self.article_order = 0
|
self.article_order = 0
|
||||||
self.article_profile = None
|
self.article_profile = None
|
||||||
self.article_profileurl = None
|
self.article_profileurl = None
|
||||||
self.platform_id = None
|
self.platform_id = None
|
||||||
self.keyword_id = -1
|
self.keyword_id = -1
|
||||||
self.reply_url = None
|
self.reply_url = None
|
||||||
self.etc = None
|
self.etc = None
|
||||||
|
|
||||||
def get_keys(self):
|
def get_keys(self):
|
||||||
inst = DataDBRow()
|
inst = DataDBRow()
|
||||||
keys = ()
|
keys = ()
|
||||||
for key, value_type in inst.__dict__.items():
|
for key, value_type in inst.__dict__.items():
|
||||||
if key.startswith('__') or callable(value_type):
|
if key.startswith('__') or callable(value_type):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
keys += key,
|
keys += key,
|
||||||
|
|
||||||
return keys
|
return keys
|
||||||
|
|
||||||
def get_values(self, conn, db_num):
|
def get_values(self, conn, db_num):
|
||||||
inst = DataDBRow()
|
inst = DataDBRow()
|
||||||
values = ()
|
values = ()
|
||||||
for key, value_type in inst.__dict__.items():
|
for key, value_type in inst.__dict__.items():
|
||||||
if key.startswith('__') or callable(value_type):
|
if key.startswith('__') or callable(value_type):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
value = self.__dict__[key]
|
value = self.__dict__[key]
|
||||||
if isinstance(value, Number):
|
if isinstance(value, Number):
|
||||||
values += str(value),
|
values += str(value),
|
||||||
elif isinstance(value, str):
|
elif isinstance(value, str):
|
||||||
values += conn.escape(value.encode('utf8').decode('utf8')),
|
values += conn.escape(value.encode('utf8').decode('utf8')),
|
||||||
else:
|
else:
|
||||||
values += conn.escape(value),
|
values += conn.escape(value),
|
||||||
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
def get_delete_query(self, db_num):
|
def get_delete_query(self, db_num):
|
||||||
query = """delete from data_{} where article_url='{}'""".format(db_num, self.article_url)
|
query = """delete from data_{} where article_url='{}'""".format(db_num, self.article_url)
|
||||||
return query
|
return query
|
||||||
|
|
||||||
def get_insert_query(self, conn, db_num):
|
def get_insert_query(self, conn, db_num):
|
||||||
|
|
||||||
inst = DataDBRow()
|
inst = DataDBRow()
|
||||||
|
|
||||||
keys = ''
|
keys = ''
|
||||||
values = ''
|
values = ''
|
||||||
for key, value_type in inst.__dict__.items():
|
for key, value_type in inst.__dict__.items():
|
||||||
if key.startswith('__') or callable(value_type):
|
if key.startswith('__') or callable(value_type):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if len(keys) > 0:
|
if len(keys) > 0:
|
||||||
keys += ', '
|
keys += ', '
|
||||||
values += ', '
|
values += ', '
|
||||||
|
|
||||||
keys += key
|
keys += key
|
||||||
value = self.__dict__[key]
|
value = self.__dict__[key]
|
||||||
if isinstance(value, Number):
|
if isinstance(value, Number):
|
||||||
values += str(value)
|
values += str(value)
|
||||||
elif isinstance(value, str):
|
elif isinstance(value, str):
|
||||||
values += conn.escape(value.encode('utf8').decode('utf8'))
|
values += conn.escape(value.encode('utf8').decode('utf8'))
|
||||||
else:
|
else:
|
||||||
values += conn.escape(value)
|
values += conn.escape(value)
|
||||||
|
|
||||||
query = 'insert into data_{} ({}) values ({})'.format(db_num, keys, values)
|
query = 'insert into data_{} ({}) values ({})'.format(db_num, keys, values)
|
||||||
return query
|
return query
|
||||||
|
|||||||
@@ -4,10 +4,10 @@ import base.logger as logger
|
|||||||
|
|
||||||
|
|
||||||
def print_exception(obj=None):
|
def print_exception(obj=None):
|
||||||
exc_type, exc_obj, tb = sys.exc_info()
|
exc_type, exc_obj, tb = sys.exc_info()
|
||||||
f = tb.tb_frame
|
f = tb.tb_frame
|
||||||
lineno = tb.tb_lineno
|
lineno = tb.tb_lineno
|
||||||
filename = f.f_code.co_filename
|
filename = f.f_code.co_filename
|
||||||
linecache.checkcache(filename)
|
linecache.checkcache(filename)
|
||||||
line = linecache.getline(filename, lineno, f.f_globals)
|
line = linecache.getline(filename, lineno, f.f_globals)
|
||||||
logger.log('({}({}) Exception from "{}"):\n {}, {}'.format(filename, lineno, line.strip(), exc_obj, obj if obj else ''), logger.LogLevel.ERROR)
|
logger.log('({}({}) Exception from "{}"):\n {}, {}'.format(filename, lineno, line.strip(), exc_obj, obj if obj else ''), logger.LogLevel.ERROR)
|
||||||
|
|||||||
@@ -223,8 +223,8 @@ class Proxy2Handler:
|
|||||||
self.lock.acquire()
|
self.lock.acquire()
|
||||||
for proxy in proxies:
|
for proxy in proxies:
|
||||||
query = r"INSERT INTO proxy2(ip, PORT) " \
|
query = r"INSERT INTO proxy2(ip, PORT) " \
|
||||||
r"SELECT '{}', {} FROM DUAL " \
|
r"SELECT '{}', {} FROM DUAL " \
|
||||||
r"WHERE NOT EXISTS (SELECT * FROM proxy2 WHERE ip='{}' AND PORT={})"\
|
r"WHERE NOT EXISTS (SELECT * FROM proxy2 WHERE ip='{}' AND PORT={})"\
|
||||||
.format(proxy['ip'], proxy['port'], proxy['ip'], proxy['port'])
|
.format(proxy['ip'], proxy['port'], proxy['ip'], proxy['port'])
|
||||||
# 안됨 - 중복으로 들어감, 쓰레드 종료됨
|
# 안됨 - 중복으로 들어감, 쓰레드 종료됨
|
||||||
self.engine.execute(query)
|
self.engine.execute(query)
|
||||||
|
|||||||
@@ -2,69 +2,69 @@ import datetime
|
|||||||
import copy
|
import copy
|
||||||
|
|
||||||
class TwitterConfig:
|
class TwitterConfig:
|
||||||
protocol = 'https'
|
protocol = 'https'
|
||||||
top_url = 'twitter.com'
|
top_url = 'twitter.com'
|
||||||
search_url = '/i/search/timeline'
|
search_url = '/i/search/timeline'
|
||||||
conversation_url_form = '/i/{}/conversation/{}'
|
conversation_url_form = '/i/{}/conversation/{}'
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.keyword_id = -1
|
self.keyword_id = -1
|
||||||
self.db_num = -1
|
self.db_num = -1
|
||||||
|
|
||||||
self.id = 0
|
self.id = 0
|
||||||
self.realtime = False
|
self.realtime = False
|
||||||
self.keywords = []
|
self.keywords = []
|
||||||
self.start_str = None
|
self.start_str = None
|
||||||
self.start = None
|
self.start = None
|
||||||
self.end_str = None
|
self.end_str = None
|
||||||
self.end = None
|
self.end = None
|
||||||
self.authorship = None
|
self.authorship = None
|
||||||
self.state = None
|
self.state = None
|
||||||
self.platform = None
|
self.platform = None
|
||||||
|
|
||||||
def set_param(self, keyword_id, db_num, params):
|
def set_param(self, keyword_id, db_num, params):
|
||||||
self.keyword_id = int(keyword_id)
|
self.keyword_id = int(keyword_id)
|
||||||
self.db_num = int(db_num)
|
self.db_num = int(db_num)
|
||||||
|
|
||||||
self.id = int(params['id'])
|
self.id = int(params['id'])
|
||||||
self.realtime = params['realtime'] == 1
|
self.realtime = params['realtime'] == 1
|
||||||
|
|
||||||
self.keywords = []
|
self.keywords = []
|
||||||
for keyword in params['searches'].split(','):
|
for keyword in params['searches'].split(','):
|
||||||
self.keywords.append(keyword.strip())
|
self.keywords.append(keyword.strip())
|
||||||
|
|
||||||
self.start_str = str(params['start'])
|
self.start_str = str(params['start'])
|
||||||
self.end_str = str(params['end'])
|
self.end_str = str(params['end'])
|
||||||
self.start = datetime.datetime.strptime(self.start_str, '%Y-%m-%d')
|
self.start = datetime.datetime.strptime(self.start_str, '%Y-%m-%d')
|
||||||
self.end = datetime.datetime.strptime(self.end_str, '%Y-%m-%d')
|
self.end = datetime.datetime.strptime(self.end_str, '%Y-%m-%d')
|
||||||
|
|
||||||
self.authorship = params['authorship']
|
self.authorship = params['authorship']
|
||||||
self.state = params['state']
|
self.state = params['state']
|
||||||
self.platform = params['platform']
|
self.platform = params['platform']
|
||||||
|
|
||||||
def reload_realtime(self, before_day):
|
def reload_realtime(self, before_day):
|
||||||
if not self.realtime:
|
if not self.realtime:
|
||||||
return
|
return
|
||||||
|
|
||||||
self.end_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
|
self.end_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
|
||||||
self.end = datetime.datetime.strptime(self.end_str, '%Y-%m-%d')
|
self.end = datetime.datetime.strptime(self.end_str, '%Y-%m-%d')
|
||||||
self.start = self.end + datetime.timedelta(days=int(before_day))
|
self.start = self.end + datetime.timedelta(days=int(before_day))
|
||||||
self.start_str = datetime.datetime.strftime(self.start, '%Y-%m-%d')
|
self.start_str = datetime.datetime.strftime(self.start, '%Y-%m-%d')
|
||||||
|
|
||||||
def split(self):
|
def split(self):
|
||||||
split_list = []
|
split_list = []
|
||||||
new_end = self.end
|
new_end = self.end
|
||||||
|
|
||||||
while new_end > self.start:
|
while new_end > self.start:
|
||||||
new_config = copy.deepcopy(self)
|
new_config = copy.deepcopy(self)
|
||||||
|
|
||||||
new_config.end = new_end
|
new_config.end = new_end
|
||||||
new_end = new_end + datetime.timedelta(days=-1)
|
new_end = new_end + datetime.timedelta(days=-1)
|
||||||
new_config.start = new_end
|
new_config.start = new_end
|
||||||
|
|
||||||
new_config.start_str = new_config.start.strftime('%Y-%m-%d')
|
new_config.start_str = new_config.start.strftime('%Y-%m-%d')
|
||||||
new_config.end_str = new_config.end.strftime('%Y-%m-%d')
|
new_config.end_str = new_config.end.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
split_list.append(new_config)
|
split_list.append(new_config)
|
||||||
|
|
||||||
return split_list
|
return split_list
|
||||||
|
|||||||
@@ -3,81 +3,81 @@ import queue
|
|||||||
|
|
||||||
|
|
||||||
class TwitterDBHelper:
|
class TwitterDBHelper:
|
||||||
pymysql = __import__('pymysql.cursors')
|
pymysql = __import__('pymysql.cursors')
|
||||||
DB_DUMP_SIZE = 128
|
DB_DUMP_SIZE = 128
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.tweets = []
|
self.tweets = []
|
||||||
self.buffer = []
|
self.buffer = []
|
||||||
self.queue = queue.Queue()
|
self.queue = queue.Queue()
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self.flush()
|
self.flush()
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_param(self, keyword_id):
|
def get_param(self, keyword_id):
|
||||||
query = "select * from keyword where id = " + str(keyword_id)
|
query = "select * from keyword where id = " + str(keyword_id)
|
||||||
params = []
|
params = []
|
||||||
try:
|
try:
|
||||||
conn = self.pymysql.connect(host='bigbird.iptime.org',
|
conn = self.pymysql.connect(host='bigbird.iptime.org',
|
||||||
user='admin', passwd='admin123',
|
user='admin', passwd='admin123',
|
||||||
db='concepters', charset='utf8',
|
db='concepters', charset='utf8',
|
||||||
cursorclass=self.pymysql.cursors.DictCursor)
|
cursorclass=self.pymysql.cursors.DictCursor)
|
||||||
|
|
||||||
with conn.cursor() as cursor:
|
with conn.cursor() as cursor:
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
params = cursor.fetchone()
|
params = cursor.fetchone()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def flush(self):
|
def flush(self):
|
||||||
local_buffer = []
|
local_buffer = []
|
||||||
while not self.queue.empty():
|
while not self.queue.empty():
|
||||||
local_buffer.append(self.queue.get())
|
local_buffer.append(self.queue.get())
|
||||||
|
|
||||||
print('### db queue dump {}'.format(len(local_buffer)))
|
print('### db queue dump {}'.format(len(local_buffer)))
|
||||||
|
|
||||||
if len(local_buffer) > 0:
|
if len(local_buffer) > 0:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
conn = self.pymysql.connect(host='bigbird.iptime.org',
|
conn = self.pymysql.connect(host='bigbird.iptime.org',
|
||||||
user='admin', passwd='admin123',
|
user='admin', passwd='admin123',
|
||||||
db='concepters', charset='utf8',
|
db='concepters', charset='utf8',
|
||||||
cursorclass=self.pymysql.cursors.DictCursor,
|
cursorclass=self.pymysql.cursors.DictCursor,
|
||||||
connect_timeout=5)
|
connect_timeout=5)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with conn.cursor() as cursor:
|
with conn.cursor() as cursor:
|
||||||
for tweet, _db_num in local_buffer:
|
for tweet, _db_num in local_buffer:
|
||||||
if not tweet.is_reply:
|
if not tweet.is_reply:
|
||||||
query = tweet.get_delete_query(_db_num)
|
query = tweet.get_delete_query(_db_num)
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
query = tweet.get_insert_query(conn, _db_num)
|
query = tweet.get_insert_query(conn, _db_num)
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
def insert_tweet(self, tweet: Tweet = None, db_num: int = -1, flush=False):
|
def insert_tweet(self, tweet: Tweet = None, db_num: int = -1, flush=False):
|
||||||
self.queue.put((tweet, db_num))
|
self.queue.put((tweet, db_num))
|
||||||
if self.queue.qsize() >= self.DB_DUMP_SIZE:
|
if self.queue.qsize() >= self.DB_DUMP_SIZE:
|
||||||
self.flush()
|
self.flush()
|
||||||
|
|||||||
@@ -3,22 +3,22 @@ from base.dbdata import DataDBRow
|
|||||||
|
|
||||||
class Tweet(DataDBRow):
|
class Tweet(DataDBRow):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(self.__class__, self).__init__()
|
super(self.__class__, self).__init__()
|
||||||
|
|
||||||
self.tweet_id = None
|
self.tweet_id = None
|
||||||
self.user_id = None
|
self.user_id = None
|
||||||
self.user_name = None
|
self.user_name = None
|
||||||
self.text = None
|
self.text = None
|
||||||
self.created_at = None
|
self.created_at = None
|
||||||
self.retweets = 0
|
self.retweets = 0
|
||||||
self.favorites = 0
|
self.favorites = 0
|
||||||
|
|
||||||
self.is_reply = False
|
self.is_reply = False
|
||||||
self.reply_cnt = 0
|
self.reply_cnt = 0
|
||||||
self.retweet_cnt = 0
|
self.retweet_cnt = 0
|
||||||
self.favorite_cnt = 0
|
self.favorite_cnt = 0
|
||||||
self.top_link = None
|
self.top_link = None
|
||||||
self.tweet_link = None
|
self.tweet_link = None
|
||||||
|
|
||||||
self.depth = 0
|
self.depth = 0
|
||||||
|
|||||||
@@ -7,90 +7,90 @@ import pytz
|
|||||||
|
|
||||||
class TweetParser:
|
class TweetParser:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse(tag, keyword_id, depth=0, top_tw: Tweet=None):
|
def parse(tag, keyword_id, depth=0, top_tw: Tweet=None):
|
||||||
tweet = Tweet()
|
tweet = Tweet()
|
||||||
|
|
||||||
tweet.tweet_id = int(tag.attrs['data-tweet-id'])
|
tweet.tweet_id = int(tag.attrs['data-tweet-id'])
|
||||||
|
|
||||||
nickname_tag = tag.select('strong.fullname')[0]
|
nickname_tag = tag.select('strong.fullname')[0]
|
||||||
tweet.user_name = ''
|
tweet.user_name = ''
|
||||||
for child in nickname_tag.children:
|
for child in nickname_tag.children:
|
||||||
if isinstance(child, bs4.element.NavigableString):
|
if isinstance(child, bs4.element.NavigableString):
|
||||||
if len(tweet.user_name) > 0:
|
if len(tweet.user_name) > 0:
|
||||||
tweet.user_name += ' '
|
tweet.user_name += ' '
|
||||||
tweet.user_name += child
|
tweet.user_name += child
|
||||||
tweet.user_id = tag.select('span.username')[0].text[1:]
|
tweet.user_id = tag.select('span.username')[0].text[1:]
|
||||||
tweet.text = tag.select('p.tweet-text')[0].text
|
tweet.text = tag.select('p.tweet-text')[0].text
|
||||||
|
|
||||||
# time_str = tag.select('a.tweet-timestamp')[0].attrs['title']
|
# time_str = tag.select('a.tweet-timestamp')[0].attrs['title']
|
||||||
# english
|
# english
|
||||||
# tweet.created_at = datetime.datetime.strptime(time_str, '%I:%M %p - %d %b %Y')
|
# tweet.created_at = datetime.datetime.strptime(time_str, '%I:%M %p - %d %b %Y')
|
||||||
# korean
|
# korean
|
||||||
# time_str = time_str.replace('오전', 'AM').replace('오후', 'PM')
|
# time_str = time_str.replace('오전', 'AM').replace('오후', 'PM')
|
||||||
# tweet.created_at = datetime.datetime.strptime(time_str, '%p %I:%M - %Y년 %m월 %d일')
|
# tweet.created_at = datetime.datetime.strptime(time_str, '%p %I:%M - %Y년 %m월 %d일')
|
||||||
|
|
||||||
timestamp = int(tag.select('span._timestamp')[0].attrs['data-time'])
|
timestamp = int(tag.select('span._timestamp')[0].attrs['data-time'])
|
||||||
utc_dt = datetime.datetime.utcfromtimestamp(timestamp)
|
utc_dt = datetime.datetime.utcfromtimestamp(timestamp)
|
||||||
local_tz = pytz.timezone('Asia/Seoul')
|
local_tz = pytz.timezone('Asia/Seoul')
|
||||||
local_dt = utc_dt.replace(tzinfo=pytz.utc).astimezone(local_tz)
|
local_dt = utc_dt.replace(tzinfo=pytz.utc).astimezone(local_tz)
|
||||||
tweet.created_at = local_tz.normalize(local_dt)
|
tweet.created_at = local_tz.normalize(local_dt)
|
||||||
|
|
||||||
reply_tag = tag.select('div.ReplyingToContextBelowAuthor')
|
reply_tag = tag.select('div.ReplyingToContextBelowAuthor')
|
||||||
tweet.is_reply = len(reply_tag) > 0
|
tweet.is_reply = len(reply_tag) > 0
|
||||||
|
|
||||||
reply_cnt_tag = tag.select('span.ProfileTweet-action--reply > span.ProfileTweet-actionCount')
|
reply_cnt_tag = tag.select('span.ProfileTweet-action--reply > span.ProfileTweet-actionCount')
|
||||||
if len(reply_cnt_tag) > 0:
|
if len(reply_cnt_tag) > 0:
|
||||||
tweet.reply_cnt = int(reply_cnt_tag[0].attrs['data-tweet-stat-count'])
|
tweet.reply_cnt = int(reply_cnt_tag[0].attrs['data-tweet-stat-count'])
|
||||||
|
|
||||||
retweet_cnt_tag = tag.select('span.ProfileTweet-action--retweet > span.ProfileTweet-actionCount')
|
retweet_cnt_tag = tag.select('span.ProfileTweet-action--retweet > span.ProfileTweet-actionCount')
|
||||||
if len(retweet_cnt_tag) > 0:
|
if len(retweet_cnt_tag) > 0:
|
||||||
tweet.retweet_cnt = int(retweet_cnt_tag[0].attrs['data-tweet-stat-count'])
|
tweet.retweet_cnt = int(retweet_cnt_tag[0].attrs['data-tweet-stat-count'])
|
||||||
|
|
||||||
favorite_cnt_tag = tag.select('span.ProfileTweet-action--favorite > span.ProfileTweet-actionCount')
|
favorite_cnt_tag = tag.select('span.ProfileTweet-action--favorite > span.ProfileTweet-actionCount')
|
||||||
if len(favorite_cnt_tag) > 0:
|
if len(favorite_cnt_tag) > 0:
|
||||||
tweet.favorites_cnt = int(favorite_cnt_tag[0].attrs['data-tweet-stat-count'])
|
tweet.favorites_cnt = int(favorite_cnt_tag[0].attrs['data-tweet-stat-count'])
|
||||||
|
|
||||||
link_tag = tag.select('a.js-permalink')
|
link_tag = tag.select('a.js-permalink')
|
||||||
if len(link_tag) > 0:
|
if len(link_tag) > 0:
|
||||||
tweet.tweet_link = TwitterConfig.protocol + '://' + TwitterConfig.top_url + link_tag[0].attrs['href']
|
tweet.tweet_link = TwitterConfig.protocol + '://' + TwitterConfig.top_url + link_tag[0].attrs['href']
|
||||||
tweet.top_link = top_tw.tweet_link if top_tw else tweet.tweet_link
|
tweet.top_link = top_tw.tweet_link if top_tw else tweet.tweet_link
|
||||||
|
|
||||||
tweet.depth = depth
|
tweet.depth = depth
|
||||||
|
|
||||||
tweet.platform_name = 'twitter'
|
tweet.platform_name = 'twitter'
|
||||||
tweet.platform_form = 'post'
|
tweet.platform_form = 'post'
|
||||||
tweet.platform_title = top_tw.user_id if top_tw else tweet.user_id
|
tweet.platform_title = top_tw.user_id if top_tw else tweet.user_id
|
||||||
tweet.article_form = 'body' if tweet.depth is 0 else 'reply'
|
tweet.article_form = 'body' if tweet.depth is 0 else 'reply'
|
||||||
# tweet.article_parent = None
|
# tweet.article_parent = None
|
||||||
tweet.article_id = tweet.user_id
|
tweet.article_id = tweet.user_id
|
||||||
tweet.article_nickname = tweet.user_name
|
tweet.article_nickname = tweet.user_name
|
||||||
# tweet.article_title = None
|
# tweet.article_title = None
|
||||||
tweet.article_data = tweet.text
|
tweet.article_data = tweet.text
|
||||||
tweet.article_url = tweet.top_link
|
tweet.article_url = tweet.top_link
|
||||||
# tweet.article_hit = 0
|
# tweet.article_hit = 0
|
||||||
tweet.article_date = tweet.created_at
|
tweet.article_date = tweet.created_at
|
||||||
tweet.article_order = tweet.depth
|
tweet.article_order = tweet.depth
|
||||||
# tweet.article_profile = tweet.user_name
|
# tweet.article_profile = tweet.user_name
|
||||||
tweet.article_profileurl = TwitterConfig.protocol + '://' + TwitterConfig.top_url + '/' + tweet.user_id
|
tweet.article_profileurl = TwitterConfig.protocol + '://' + TwitterConfig.top_url + '/' + tweet.user_id
|
||||||
tweet.platform_id = top_tw.user_id if top_tw else tweet.user_id
|
tweet.platform_id = top_tw.user_id if top_tw else tweet.user_id
|
||||||
tweet.keyword_id = keyword_id
|
tweet.keyword_id = keyword_id
|
||||||
tweet.reply_url = tweet.tweet_link
|
tweet.reply_url = tweet.tweet_link
|
||||||
# tweet.etc = ''
|
# tweet.etc = ''
|
||||||
|
|
||||||
return tweet
|
return tweet
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_lone_container(soup, parent_tw):
|
def get_lone_container(soup, parent_tw):
|
||||||
lone_tweets = soup.select('div.ThreadedConversation--loneTweet')
|
lone_tweets = soup.select('div.ThreadedConversation--loneTweet')
|
||||||
container_tags = []
|
container_tags = []
|
||||||
for tag in reversed(lone_tweets):
|
for tag in reversed(lone_tweets):
|
||||||
li = tag.select('li.stream-item')
|
li = tag.select('li.stream-item')
|
||||||
if len(li) > 0 and 'data-item-id' in li[0].attrs:
|
if len(li) > 0 and 'data-item-id' in li[0].attrs:
|
||||||
tweet_id = int(li[0].attrs['data-item-id'])
|
tweet_id = int(li[0].attrs['data-item-id'])
|
||||||
if tweet_id == parent_tw.tweet_id:
|
if tweet_id == parent_tw.tweet_id:
|
||||||
break
|
break
|
||||||
|
|
||||||
container_tags.append(tag)
|
container_tags.append(tag)
|
||||||
|
|
||||||
return reversed(container_tags)
|
return reversed(container_tags)
|
||||||
|
|||||||
Reference in New Issue
Block a user