146 lines
3.3 KiB
Python
Executable File
146 lines
3.3 KiB
Python
Executable File
import os
|
|
import pickle
|
|
import requests
|
|
import bs4
|
|
import concurrent.futures
|
|
import time
|
|
|
|
from .Logger import Logger
|
|
|
|
|
|
class ProxyHandler:
|
|
PROXY_FILE_NAME = 'temp/proxy.bin'
|
|
|
|
def __init__(self):
|
|
if not os.path.exists('temp'):
|
|
os.makedirs('temp')
|
|
|
|
self.proxies = []
|
|
self.check_url = ''
|
|
|
|
def check_proxy(self, proxy, top_url):
|
|
try:
|
|
resp = requests.get(top_url, proxies=proxy, timeout=2)
|
|
except:
|
|
proxy['alive'] = False
|
|
else:
|
|
if resp.status_code != 200:
|
|
proxy['alive'] = False
|
|
|
|
def check_proxy_all(self, proxies, check_url):
|
|
Logger.log('checking proxies for {}'.format(check_url))
|
|
|
|
worker_cnt = 16
|
|
pool = concurrent.futures.ThreadPoolExecutor(worker_cnt)
|
|
[pool.submit(self.check_proxy, proxy, check_url) for proxy in proxies]
|
|
pool.shutdown()
|
|
|
|
def has_file(self):
|
|
return os.path.exists(self.PROXY_FILE_NAME)
|
|
|
|
def load_proxy(self):
|
|
with open(self.PROXY_FILE_NAME, 'rb') as f:
|
|
proxies = pickle.load(f)
|
|
|
|
return proxies
|
|
|
|
def crawl_proxy(self):
|
|
proxies = []
|
|
|
|
resp = requests.get('https://www.us-proxy.org')
|
|
soup = bs4.BeautifulSoup(resp.text, 'lxml')
|
|
table = soup.select('table.table')
|
|
trs = table[0].select('tr')
|
|
for tr in trs[1:]:
|
|
tds = tr.select('td')
|
|
if len(tds) < 2:
|
|
continue
|
|
|
|
ip, port = tds[0].text, tds[1].text
|
|
proxies.append(
|
|
{
|
|
'alive': True,
|
|
'http': '{}:{}'.format(ip, port),
|
|
'https': '{}:{}'.format(ip, port),
|
|
}
|
|
)
|
|
|
|
resp = requests.get('https://www.socks-proxy.net')
|
|
soup = bs4.BeautifulSoup(resp.text, 'lxml')
|
|
table = soup.select('table.table')
|
|
trs = table[0].select('tr')
|
|
for tr in trs[1:]:
|
|
tds = tr.select('td')
|
|
if len(tds) < 2:
|
|
continue
|
|
|
|
ip, port = tds[0].text, tds[1].text
|
|
proxies.append(
|
|
{
|
|
'alive': True,
|
|
'http': '{}:{}'.format(ip, port),
|
|
'https': '{}:{}'.format(ip, port),
|
|
}
|
|
)
|
|
|
|
resp = requests.get('https://www.sslproxies.org')
|
|
soup = bs4.BeautifulSoup(resp.text, 'lxml')
|
|
table = soup.select('table.table')
|
|
trs = table[0].select('tr')
|
|
for tr in trs[1:]:
|
|
tds = tr.select('td')
|
|
if len(tds) < 2:
|
|
continue
|
|
|
|
ip, port = tds[0].text, tds[1].text
|
|
proxies.append(
|
|
{
|
|
'alive': True,
|
|
'http': '{}:{}'.format(ip, port),
|
|
'https': '{}:{}'.format(ip, port),
|
|
}
|
|
)
|
|
|
|
# print(proxies)
|
|
self.check_proxy_all(proxies, self.check_url)
|
|
# print(proxies)
|
|
alive_proxies = [proxy for proxy in proxies if proxy['alive']]
|
|
Logger.log('proxies checking end: available : {}'.format(len(alive_proxies)))
|
|
|
|
with open(self.PROXY_FILE_NAME, 'wb') as f:
|
|
pickle.dump(alive_proxies, f)
|
|
|
|
return alive_proxies
|
|
|
|
def get_proxy(self):
|
|
if len(self.proxies) <= 0:
|
|
if self.has_file():
|
|
self.proxies = self.load_proxy()
|
|
self.proxies = [proxy for proxy in self.proxies if proxy['alive']]
|
|
|
|
if len(self.proxies) <= 0:
|
|
while True:
|
|
self.proxies = self.crawl_proxy()
|
|
if len(self.proxies) > 0:
|
|
break
|
|
else:
|
|
Logger.log('there is no available proxy. sleep 10secs..')
|
|
time.sleep(10)
|
|
|
|
for proxy in self.proxies:
|
|
if proxy['alive']:
|
|
return proxy
|
|
|
|
return None
|
|
|
|
def set_proxy_dead(self, proxy):
|
|
proxy['alive'] = False
|
|
for proxy in self.proxies:
|
|
if proxy['alive']:
|
|
with open(self.PROXY_FILE_NAME, 'wb') as f:
|
|
pickle.dump(self.proxies, f)
|
|
return
|
|
|
|
os.remove(self.PROXY_FILE_NAME)
|
|
self.proxies = []
|