facebook id 크롤링 잘못 되는것 수정
git-svn-id: svn://192.168.0.12/source@250 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -237,7 +237,7 @@ class SendtoDB:
|
||||
self.conn.commit()
|
||||
except self.pymysql.err.OperationalError as e:
|
||||
print(e)
|
||||
if e.args[0] == 2013 or e.args[0] == 2006: # Lost connection to server
|
||||
if e.args[0] == 2013 or e.args[0] == 2006 or e.args[0] == 10054: # Lost connection to server
|
||||
print("connection lost. try to reconnection")
|
||||
self.conn = self.pymysql.connect(host='bigbird.iptime.org',
|
||||
user='admin', passwd='admin123',
|
||||
@@ -248,7 +248,7 @@ class SendtoDB:
|
||||
self.conn.commit()
|
||||
except self.pymysql.err.MySQLError as e:
|
||||
print(e)
|
||||
if e.args[0] == 2013 or e.args[0] == 2006: # Lost connection to server
|
||||
if e.args[0] == 2013 or e.args[0] == 2006 or e.args[0] == 10054: # Lost connection to server
|
||||
print("connection lost. try to reconnection")
|
||||
self.conn = self.pymysql.connect(host='bigbird.iptime.org',
|
||||
user='admin', passwd='admin123',
|
||||
@@ -286,7 +286,7 @@ class SendtoDB:
|
||||
self.conn.commit()
|
||||
except self.pymysql.err.OperationalError as e:
|
||||
print(e)
|
||||
if e.args[0] == 2013 or e.args[0] == 2006: # Lost connection to server
|
||||
if e.args[0] == 2013 or e.args[0] == 2006 or e.args[0] == 10054: # Lost connection to server
|
||||
print("connection lost. try to reconnection")
|
||||
self.conn = self.pymysql.connect(host='bigbird.iptime.org',
|
||||
user='admin', passwd='admin123',
|
||||
@@ -297,7 +297,7 @@ class SendtoDB:
|
||||
self.conn.commit()
|
||||
except self.pymysql.err.MySQLError as e:
|
||||
print(e)
|
||||
if e.args[0] == 2013 or e.args[0] == 2006: # Lost connection to server
|
||||
if e.args[0] == 2013 or e.args[0] == 2006 or e.args[0] == 10054: # Lost connection to server
|
||||
print("connection lost. try to reconnection")
|
||||
self.conn = self.pymysql.connect(host='bigbird.iptime.org',
|
||||
user='admin', passwd='admin123',
|
||||
|
||||
@@ -58,7 +58,8 @@ class FacebookInit(CrawlInit):
|
||||
# return trimmed_list
|
||||
|
||||
def make_url(self):
|
||||
return [self.urls[self.platform()] + x + "?fref=ts" for x in self.split_searches()]
|
||||
# return [self.urls[self.platform()] + x + "?fref=ts" for x in self.split_searches()]
|
||||
return [self.urls[self.platform()] + x for x in self.split_searches()]
|
||||
# urls = list()
|
||||
# for x in self.split_searches():
|
||||
# url = self.urls[self.platform()] + x + "?fref=ts"
|
||||
@@ -92,9 +93,9 @@ class FacebookBodyCrawler:
|
||||
self.re_date = re.compile(
|
||||
"([\\d]{4})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2}):([\\d]{1,2})"
|
||||
)
|
||||
self.re_id = re.compile("id=([\\d]+)")
|
||||
self.re_id = re.compile("[^fb]id=([\\d]+)")
|
||||
# self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._-]+)\\??", re.UNICODE)
|
||||
self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
|
||||
self.re_ids = re.compile("[^fb]id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
|
||||
#(("id=([\\d]+)|facebook.com/([\\w._]+)\\?"))
|
||||
|
||||
def set_driver(self, driver):
|
||||
@@ -323,9 +324,9 @@ class FacebookReplyCrawler:
|
||||
self.re_date = re.compile(
|
||||
"([\\d]{4})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2}):([\\d]{1,2})"
|
||||
)
|
||||
self.re_id = re.compile("id=([\\d]+)")
|
||||
self.re_id = re.compile("[^fb]id=([\\d]+)")
|
||||
# self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._-]+)\\??", re.UNICODE)
|
||||
self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
|
||||
self.re_ids = re.compile("[^fb]id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
|
||||
|
||||
def find_init(self):
|
||||
self.reply_list.clear()
|
||||
|
||||
Reference in New Issue
Block a user