- 불필요한 print 삭제

- thread 16개로
This commit is contained in:
mjjo
2017-08-10 14:29:33 +09:00
parent cba76dbe59
commit df5ca87d8a
3 changed files with 10 additions and 7 deletions

View File

@@ -132,7 +132,7 @@ class Proxy2Handler:
return instance return instance
def check_all_proxies(self, platform): def check_all_proxies(self, platform):
print('check all start') # print('check all start')
url_map = { url_map = {
Platform.NAVER: 'https://www.naver.com', Platform.NAVER: 'https://www.naver.com',
@@ -165,7 +165,7 @@ class Proxy2Handler:
else: else:
instance.set_block_at(platform, datetime.datetime.now()) instance.set_block_at(platform, datetime.datetime.now())
print('check all end') # print('check all end')
return alive_cnt return alive_cnt
def get(self, platform, proc_id=-1): def get(self, platform, proc_id=-1):
@@ -192,7 +192,10 @@ class Proxy2Handler:
return None return None
instance = instances[random.randint(0, len(instances)-1)] if len(instances) > 0 else None instance = None
if len(instances) > 0:
instance = instances[random.randint(0, len(instances)-1)] if len(instances) > 0 else None
if instance: if instance:
self.lock_leave() self.lock_leave()
return instance.get_instance_for_http() return instance.get_instance_for_http()

View File

@@ -86,12 +86,12 @@ def check_proxy(qu, proxy, url):
def crawl_proxies(check_url=None): def crawl_proxies(check_url=None):
print('proxy crawling start') # print('proxy crawling start')
proxies = get_proxies_free_proxy() proxies = get_proxies_free_proxy()
proxies += get_proxies_proxy_searcher() proxies += get_proxies_proxy_searcher()
# proxies += get_proxies_nntime() # proxies += get_proxies_nntime()
# proxies = list(set(proxies)) # proxies = list(set(proxies))
print('proxy crawled {}'.format(len(proxies))) # print('proxy crawled {}'.format(len(proxies)))
if check_url: if check_url:
qu = queue.Queue() qu = queue.Queue()
@@ -111,7 +111,7 @@ def crawl_proxies(check_url=None):
else: else:
proxies_alive = proxies proxies_alive = proxies
print('proxy crawling end') # print('proxy crawling end')
return proxies_alive return proxies_alive
# proxies_alive.sort() # proxies_alive.sort()

View File

@@ -303,7 +303,7 @@ class TwitterCrawler:
start_time = time.time() start_time = time.time()
# run # run
worker_count = 1 worker_count = 16
split_config = self.default_config.split() split_config = self.default_config.split()
content_qu = queue.Queue() content_qu = queue.Queue()