- 중복 제거 후 insert - proxy.txt가 모두 만료되면 db 사용 - proxy db에서 중복 제거해서 가져오기 - 프록시 문제로 페이지 요청 시 0.1초 딜레이 - 크롤러 stop 동작하도록 - realtime 적용
83 lines
2.5 KiB
Python
83 lines
2.5 KiB
Python
from twitter.tweet import Tweet
|
|
import multiprocessing as mp
|
|
|
|
|
|
class TwitterDBHelper:
|
|
pymysql = __import__('pymysql.cursors')
|
|
|
|
def __init__(self):
|
|
self.tweets = []
|
|
self.buffer = []
|
|
self.lock = mp.Lock()
|
|
pass
|
|
|
|
def __del__(self):
|
|
pass
|
|
|
|
def get_param(self, keyword_id):
|
|
query = "select * from keyword where id = " + str(keyword_id)
|
|
params = []
|
|
try:
|
|
conn = self.pymysql.connect(host='bigbird.iptime.org',
|
|
user='admin', passwd='admin123',
|
|
db='concepters', charset='utf8',
|
|
cursorclass=self.pymysql.cursors.DictCursor)
|
|
|
|
with conn.cursor() as cursor:
|
|
cursor.execute(query)
|
|
params = cursor.fetchone()
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
exit(1)
|
|
|
|
else:
|
|
conn.close()
|
|
|
|
return params
|
|
|
|
def insert_tweet(self, tweet: Tweet = None, db_num: int = -1, flush=False):
|
|
|
|
# self.lock.acquire()
|
|
# if tweet is not None:
|
|
# self.buffer.append((tweet, db_num, ))
|
|
#
|
|
# local_buffer = None
|
|
# if len(self.buffer) >= 100 or flush:
|
|
# local_buffer = copy.deepcopy(self.buffer)
|
|
# self.buffer.clear()
|
|
# self.lock.release()
|
|
|
|
local_buffer = [(tweet, db_num, )]
|
|
if local_buffer:
|
|
while True:
|
|
try:
|
|
conn = self.pymysql.connect(host='bigbird.iptime.org',
|
|
user='admin', passwd='admin123',
|
|
db='concepters', charset='utf8',
|
|
cursorclass=self.pymysql.cursors.DictCursor,
|
|
connect_timeout=5)
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
continue
|
|
|
|
else:
|
|
break
|
|
|
|
try:
|
|
with conn.cursor() as cursor:
|
|
for tweet, _db_num in local_buffer:
|
|
if not tweet.is_reply:
|
|
query = tweet.get_delete_query(_db_num)
|
|
cursor.execute(query)
|
|
query = tweet.get_insert_query(conn, _db_num)
|
|
cursor.execute(query)
|
|
conn.commit()
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
|
|
finally:
|
|
conn.close()
|