facebook id 크롤링 잘못 되는것 수정

git-svn-id: svn://192.168.0.12/source@250 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2016-02-01 06:55:20 +00:00
parent e663eac55b
commit eeee7d4565
2 changed files with 10 additions and 9 deletions

View File

@@ -58,7 +58,8 @@ class FacebookInit(CrawlInit):
# return trimmed_list
def make_url(self):
return [self.urls[self.platform()] + x + "?fref=ts" for x in self.split_searches()]
# return [self.urls[self.platform()] + x + "?fref=ts" for x in self.split_searches()]
return [self.urls[self.platform()] + x for x in self.split_searches()]
# urls = list()
# for x in self.split_searches():
# url = self.urls[self.platform()] + x + "?fref=ts"
@@ -92,9 +93,9 @@ class FacebookBodyCrawler:
self.re_date = re.compile(
"([\\d]{4})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2}):([\\d]{1,2})"
)
self.re_id = re.compile("id=([\\d]+)")
self.re_id = re.compile("[^fb]id=([\\d]+)")
# self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._-]+)\\??", re.UNICODE)
self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
self.re_ids = re.compile("[^fb]id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
#(("id=([\\d]+)|facebook.com/([\\w._]+)\\?"))
def set_driver(self, driver):
@@ -323,9 +324,9 @@ class FacebookReplyCrawler:
self.re_date = re.compile(
"([\\d]{4})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2})[^\\d]+([\\d]{1,2}):([\\d]{1,2})"
)
self.re_id = re.compile("id=([\\d]+)")
self.re_id = re.compile("[^fb]id=([\\d]+)")
# self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._-]+)\\??", re.UNICODE)
self.re_ids = re.compile("id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
self.re_ids = re.compile("[^fb]id=([\\d]+)|facebook.com/(?!p[a-zA-Z_.-]+\\.php)([\\w._\\-%]+)")
def find_init(self):
self.reply_list.clear()