45 lines
1.9 KiB
Python
45 lines
1.9 KiB
Python
import base.baseclasses
|
|
import time
|
|
from selenium.webdriver.common.keys import Keys
|
|
|
|
base_url = 'https://search.naver.com/search.naver?where=post&sm=tab_jum&ie=utf8&query='
|
|
keywords = ['vsl', '유산균']
|
|
|
|
|
|
if '__main__' == __name__:
|
|
browser = base.baseclasses.Browser()
|
|
driver = browser.new_firefox_browser()
|
|
for keyword in keywords:
|
|
driver.get(base_url + keyword)
|
|
time.sleep(10)
|
|
rank = 1
|
|
with open(keyword + time.strftime("%Y%m%d_%H%M%S") + ".txt", 'w') as f:
|
|
try:
|
|
for i in range(1, 101):
|
|
ul = driver.find_element_by_css_selector("ul[class^='type']")
|
|
lis = ul.find_elements_by_css_selector("li[class='sh_blog_top']")
|
|
for li in lis:
|
|
try:
|
|
a = li.find_element_by_xpath("div/a")
|
|
except:
|
|
a = li.find_element_by_xpath("dl/dt/a")
|
|
href = a.get_attribute('href')
|
|
href = href.replace("?Redirect=Log&logNo=", "/")
|
|
f.write("{0}: {1}\n".format(rank, href))
|
|
print("{0}: {1}".format(rank, href))
|
|
f.flush()
|
|
rank += 1
|
|
div_paging = driver.find_element_by_css_selector("div[class='paging']")
|
|
pages = div_paging.find_elements_by_css_selector("*")
|
|
clickable = False
|
|
for j in pages:
|
|
if j.tag_name == "strong":
|
|
clickable = True
|
|
elif clickable and j.tag_name == "a":
|
|
j.send_keys(Keys.NULL)
|
|
j.send_keys(Keys.ENTER)
|
|
time.sleep(10)
|
|
break
|
|
except:
|
|
pass
|
|
driver.quit() |