facebook id 크롤링 잘못 되는것 수정

git-svn-id: svn://192.168.0.12/source@250 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2016-02-01 06:55:20 +00:00
parent e663eac55b
commit eeee7d4565
2 changed files with 10 additions and 9 deletions

View File

@@ -237,7 +237,7 @@ class SendtoDB:
self.conn.commit()
except self.pymysql.err.OperationalError as e:
print(e)
if e.args[0] == 2013 or e.args[0] == 2006: # Lost connection to server
if e.args[0] == 2013 or e.args[0] == 2006 or e.args[0] == 10054: # Lost connection to server
print("connection lost. try to reconnection")
self.conn = self.pymysql.connect(host='bigbird.iptime.org',
user='admin', passwd='admin123',
@@ -248,7 +248,7 @@ class SendtoDB:
self.conn.commit()
except self.pymysql.err.MySQLError as e:
print(e)
if e.args[0] == 2013 or e.args[0] == 2006: # Lost connection to server
if e.args[0] == 2013 or e.args[0] == 2006 or e.args[0] == 10054: # Lost connection to server
print("connection lost. try to reconnection")
self.conn = self.pymysql.connect(host='bigbird.iptime.org',
user='admin', passwd='admin123',
@@ -286,7 +286,7 @@ class SendtoDB:
self.conn.commit()
except self.pymysql.err.OperationalError as e:
print(e)
if e.args[0] == 2013 or e.args[0] == 2006: # Lost connection to server
if e.args[0] == 2013 or e.args[0] == 2006 or e.args[0] == 10054: # Lost connection to server
print("connection lost. try to reconnection")
self.conn = self.pymysql.connect(host='bigbird.iptime.org',
user='admin', passwd='admin123',
@@ -297,7 +297,7 @@ class SendtoDB:
self.conn.commit()
except self.pymysql.err.MySQLError as e:
print(e)
if e.args[0] == 2013 or e.args[0] == 2006: # Lost connection to server
if e.args[0] == 2013 or e.args[0] == 2006 or e.args[0] == 10054: # Lost connection to server
print("connection lost. try to reconnection")
self.conn = self.pymysql.connect(host='bigbird.iptime.org',
user='admin', passwd='admin123',

View File

@@ -58,7 +58,8 @@ class FacebookInit(CrawlInit):
# return trimmed_list
def make_url(self):
return [self.urls[self.platform()] + x + "?fref=ts" for x in self.split_searches()]
# return [self.urls[self.platform()] + x + "?fref=ts" for x in self.split_searches()]
return [self.urls[self.platform()] + x for x in self.split_searches()]
# urls = list()
# for x in self.split_searches():
# url = self.urls[self.platform()] + x + "?fref=ts"
@@ -92,9 +93,9 @@ class FacebookBodyCrawler:
self.re_date = re.compile(
"([\\d]{4})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2}):([\\d]{1,2})"
)
self.re_id = re.compile("id=([\\d]+)")
self.re_id = re.compile("[^fb]id=([\\d]+)")
# self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._-]+)\\??", re.UNICODE)
self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
self.re_ids = re.compile("[^fb]id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
#(("id=([\\d]+)|facebook.com/([\\w._]+)\\?"))
def set_driver(self, driver):
@@ -323,9 +324,9 @@ class FacebookReplyCrawler:
self.re_date = re.compile(
"([\\d]{4})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2}):([\\d]{1,2})"
)
self.re_id = re.compile("id=([\\d]+)")
self.re_id = re.compile("[^fb]id=([\\d]+)")
# self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._-]+)\\??", re.UNICODE)
self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
self.re_ids = re.compile("[^fb]id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
def find_init(self):
self.reply_list.clear()