body에서 id nickname 잘못가져오는 문제 디버깅
git-svn-id: svn://192.168.0.12/source@252 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -58,7 +58,7 @@ class FacebookInit(CrawlInit):
|
||||
# return trimmed_list
|
||||
|
||||
def make_url(self):
|
||||
return [self.urls[self.platform()] + 'profile.php?id=' + x if x.isnumeric() else x + "?fref=ts"
|
||||
return [self.urls[self.platform()] + ('profile.php?id=' + x if x.isnumeric() else x) + "?fref=ts"
|
||||
for x in self.split_searches()]
|
||||
# return [self.urls[self.platform()] + x for x in self.split_searches()]
|
||||
# urls = list()
|
||||
@@ -107,14 +107,22 @@ class FacebookBodyCrawler:
|
||||
if element:
|
||||
href = element.get('href')
|
||||
else:
|
||||
href = self.find_article_url(soup)
|
||||
span = soup.select_one('span.fcg span.fwb')
|
||||
if span:
|
||||
href = span.a.get('href')
|
||||
else:
|
||||
span = soup.find('span', class_='fwb fcg')
|
||||
if span:
|
||||
href = span.a.get('href')
|
||||
else:
|
||||
href = self.find_article_url(soup)
|
||||
m = self.re_ids.search(href)
|
||||
return m.group(1) if m.group(2) is None else m.group(2)
|
||||
|
||||
def find_article_nickname(self, soup):
|
||||
nickname = soup.find('div', class_='fbPhotoContributorName')
|
||||
if not nickname or not nickname.get_text():
|
||||
temp_nickname = soup.select_one('span.fwb > a')
|
||||
temp_nickname = soup.select_one('span.fwb > a')
|
||||
if temp_nickname.has_attr('href') and temp_nickname.get('href').find(self.find_article_id(soup)) != -1:
|
||||
nickname = temp_nickname
|
||||
if not nickname:
|
||||
|
||||
Reference in New Issue
Block a user