body에서 id nickname 잘못가져오는 문제 디버깅

git-svn-id: svn://192.168.0.12/source@252 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2016-02-02 03:01:33 +00:00
parent 519e40c9b8
commit 2d47116b1b

View File

@@ -58,7 +58,7 @@ class FacebookInit(CrawlInit):
# return trimmed_list
def make_url(self):
return [self.urls[self.platform()] + 'profile.php?id=' + x if x.isnumeric() else x + "?fref=ts"
return [self.urls[self.platform()] + ('profile.php?id=' + x if x.isnumeric() else x) + "?fref=ts"
for x in self.split_searches()]
# return [self.urls[self.platform()] + x for x in self.split_searches()]
# urls = list()
@@ -107,14 +107,22 @@ class FacebookBodyCrawler:
if element:
href = element.get('href')
else:
href = self.find_article_url(soup)
span = soup.select_one('span.fcg span.fwb')
if span:
href = span.a.get('href')
else:
span = soup.find('span', class_='fwb fcg')
if span:
href = span.a.get('href')
else:
href = self.find_article_url(soup)
m = self.re_ids.search(href)
return m.group(1) if m.group(2) is None else m.group(2)
def find_article_nickname(self, soup):
nickname = soup.find('div', class_='fbPhotoContributorName')
if not nickname or not nickname.get_text():
temp_nickname = soup.select_one('span.fwb > a')
temp_nickname = soup.select_one('span.fwb > a')
if temp_nickname.has_attr('href') and temp_nickname.get('href').find(self.find_article_id(soup)) != -1:
nickname = temp_nickname
if not nickname: