Files
clients/WebBasedCrawler/rankcheck.py

45 lines
1.9 KiB
Python

import base.baseclasses
import time
from selenium.webdriver.common.keys import Keys
base_url = 'https://search.naver.com/search.naver?where=post&sm=tab_jum&ie=utf8&query='
keywords = ['vsl', '유산균']
if '__main__' == __name__:
browser = base.baseclasses.Browser()
driver = browser.new_firefox_browser()
for keyword in keywords:
driver.get(base_url + keyword)
time.sleep(10)
rank = 1
with open(keyword + time.strftime("%Y%m%d_%H%M%S") + ".txt", 'w') as f:
try:
for i in range(1, 101):
ul = driver.find_element_by_css_selector("ul[class^='type']")
lis = ul.find_elements_by_css_selector("li[class='sh_blog_top']")
for li in lis:
try:
a = li.find_element_by_xpath("div/a")
except:
a = li.find_element_by_xpath("dl/dt/a")
href = a.get_attribute('href')
href = href.replace("?Redirect=Log&logNo=", "/")
f.write("{0}: {1}\n".format(rank, href))
print("{0}: {1}".format(rank, href))
f.flush()
rank += 1
div_paging = driver.find_element_by_css_selector("div[class='paging']")
pages = div_paging.find_elements_by_css_selector("*")
clickable = False
for j in pages:
if j.tag_name == "strong":
clickable = True
elif clickable and j.tag_name == "a":
j.send_keys(Keys.NULL)
j.send_keys(Keys.ENTER)
time.sleep(10)
break
except:
pass
driver.quit()