from base.baseclasses import SendtoDB from base.baseclasses import print_and_flush from base.baseclasses import CrawlInit from base.baseclasses import wait from base.baseclasses import Browser from base.baseclasses import enter_element from selenium.webdriver.common.keys import Keys def pageup_and_pagedown(_driver): # body = _driver.find_element_by_tag_name('body') # for i in range(0, 2): # body.send_keys(Keys.PAGE_UP) # wait(3) # for i in range(0, 5): # body.send_keys(Keys.PAGE_DOWN) # wait(3) for i in range(0, 3): _driver.execute_script("window.scrollBy(0, -300)") wait(0.4) for i in range(0, 5): _driver.execute_script("window.scrollBy(0, 800)") wait(0.4) def first_load(_driver): element = _driver.find_element_by_css_selector("div._pupj3 > a") enter_element(element) def get_urls(_driver, url_set): elements = _driver.find_elements_by_css_selector("div._myci9>a") for element in elements: url_set.add(element.get_attribute('href')) def remove_myci9(_driver): elements = _driver.find_elements_by_css_selector("div._myci9") for i in range(0, len(elements) - 4 if len(elements) - 4 > 0 else 0): _driver.execute_script(""" var element = document.querySelector("div._myci9"); if (element) element.parentNode.removeChild(element); """) browser = Browser() driver = browser.get_new_driver('ie') url_sets = set() wait(5) url = "https://www.instagram.com/explore/tags/%EC%A4%8C%EB%A7%88%EA%B7%B8%EB%9E%A8/" #url = 'https://www.instagram.com/explore/tags/%EB%A7%9B%EC%8A%A4%ED%83%80%EA%B7%B8%EB%9E%A8/' driver.get(url) #driver.get('https://www.instagram.com/explore/tags/맛스타그램/') wait(5) first_load(driver) wait(3) #print(driver.get_cookies()) with open("c:\\data\\instajummaie.txt", 'w') as f: try: while True: for j in range(0, 10): pageup_and_pagedown(driver) get_urls(driver, url_sets) remove_myci9(driver) print("url count = {0}\n".format(len(url_sets)), flush=True, file=f) finally: print("finished")