Files
clients/WebBasedCrawler/twitter/twdbhelper.py
2017-08-10 12:44:03 +09:00

84 lines
1.8 KiB
Python

from twitter.tweet import Tweet
import queue
class TwitterDBHelper:
pymysql = __import__('pymysql.cursors')
DB_DUMP_SIZE = 128
def __init__(self):
self.tweets = []
self.buffer = []
self.queue = queue.Queue()
pass
def __del__(self):
self.flush()
pass
def get_param(self, keyword_id):
query = "select * from keyword where id = " + str(keyword_id)
params = []
try:
conn = self.pymysql.connect(host='bigbird.iptime.org',
user='admin', passwd='admin123',
db='concepters', charset='utf8',
cursorclass=self.pymysql.cursors.DictCursor)
with conn.cursor() as cursor:
cursor.execute(query)
params = cursor.fetchone()
except Exception as e:
print(e)
exit(1)
else:
conn.close()
return params
def flush(self):
local_buffer = []
while not self.queue.empty():
local_buffer.append(self.queue.get())
print('### db queue dump {}'.format(len(local_buffer)))
if len(local_buffer) > 0:
while True:
try:
conn = self.pymysql.connect(host='bigbird.iptime.org',
user='admin', passwd='admin123',
db='concepters', charset='utf8',
cursorclass=self.pymysql.cursors.DictCursor,
connect_timeout=5)
except Exception as e:
print(e)
continue
else:
break
try:
with conn.cursor() as cursor:
for tweet, _db_num in local_buffer:
if not tweet.is_reply:
query = tweet.get_delete_query(_db_num)
cursor.execute(query)
query = tweet.get_insert_query(conn, _db_num)
cursor.execute(query)
conn.commit()
except Exception as e:
print(e)
finally:
conn.close()
def insert_tweet(self, tweet: Tweet = None, db_num: int = -1, flush=False):
self.queue.put((tweet, db_num))
if self.queue.qsize() >= self.DB_DUMP_SIZE:
self.flush()