facebook id 크롤링 잘못 되는것 수정
git-svn-id: svn://192.168.0.12/source@250 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -58,7 +58,8 @@ class FacebookInit(CrawlInit):
|
||||
# return trimmed_list
|
||||
|
||||
def make_url(self):
|
||||
return [self.urls[self.platform()] + x + "?fref=ts" for x in self.split_searches()]
|
||||
# return [self.urls[self.platform()] + x + "?fref=ts" for x in self.split_searches()]
|
||||
return [self.urls[self.platform()] + x for x in self.split_searches()]
|
||||
# urls = list()
|
||||
# for x in self.split_searches():
|
||||
# url = self.urls[self.platform()] + x + "?fref=ts"
|
||||
@@ -92,9 +93,9 @@ class FacebookBodyCrawler:
|
||||
self.re_date = re.compile(
|
||||
"([\\d]{4})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2}):([\\d]{1,2})"
|
||||
)
|
||||
self.re_id = re.compile("id=([\\d]+)")
|
||||
self.re_id = re.compile("[^fb]id=([\\d]+)")
|
||||
# self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._-]+)\\??", re.UNICODE)
|
||||
self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
|
||||
self.re_ids = re.compile("[^fb]id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
|
||||
#(("id=([\\d]+)|facebook.com/([\\w._]+)\\?"))
|
||||
|
||||
def set_driver(self, driver):
|
||||
@@ -323,9 +324,9 @@ class FacebookReplyCrawler:
|
||||
self.re_date = re.compile(
|
||||
"([\\d]{4})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2}):([\\d]{1,2})"
|
||||
)
|
||||
self.re_id = re.compile("id=([\\d]+)")
|
||||
self.re_id = re.compile("[^fb]id=([\\d]+)")
|
||||
# self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._-]+)\\??", re.UNICODE)
|
||||
self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
|
||||
self.re_ids = re.compile("[^fb]id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
|
||||
|
||||
def find_init(self):
|
||||
self.reply_list.clear()
|
||||
|
||||
Reference in New Issue
Block a user