181 lines
5.1 KiB
Python
181 lines
5.1 KiB
Python
import base.proxy_crawler as proxy_crawler
|
|
import base.logger as logger
|
|
|
|
import sqlalchemy
|
|
import sqlalchemy.ext
|
|
import sqlalchemy.ext.declarative
|
|
import sqlalchemy.orm
|
|
|
|
import enum
|
|
import datetime
|
|
import threading
|
|
import random
|
|
|
|
|
|
Base = sqlalchemy.ext.declarative.declarative_base()
|
|
|
|
|
|
class Proxy2Model(Base):
|
|
__tablename__ = 'proxy2'
|
|
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True, nullable=False, autoincrement=True)
|
|
ip = sqlalchemy.Column(sqlalchemy.String(15), primary_key=True)
|
|
port = sqlalchemy.Column(sqlalchemy.SmallInteger, primary_key=True)
|
|
create_at = sqlalchemy.Column(sqlalchemy.DateTime, default=datetime.datetime.now)
|
|
naver_block_at = sqlalchemy.Column(sqlalchemy.DateTime)
|
|
daum_block_at = sqlalchemy.Column(sqlalchemy.DateTime)
|
|
facebook_block_at = sqlalchemy.Column(sqlalchemy.DateTime)
|
|
kakao_block_at = sqlalchemy.Column(sqlalchemy.DateTime)
|
|
insta_block_at = sqlalchemy.Column(sqlalchemy.DateTime)
|
|
twitter_block_at = sqlalchemy.Column(sqlalchemy.DateTime)
|
|
youtube_block_at = sqlalchemy.Column(sqlalchemy.DateTime)
|
|
|
|
def __init__(self, ip, port):
|
|
self.ip = ip
|
|
self.port = port
|
|
|
|
def __repr__(self):
|
|
return '{}:{}'.format(self.ip, self.port)
|
|
|
|
def get_instance_for_http(self):
|
|
return {
|
|
'http': '{}:{}'.format(self.ip, self.port),
|
|
'https': '{}:{}'.format(self.ip, self.port),
|
|
'ip': self.ip,
|
|
'port': self.port,
|
|
}
|
|
|
|
|
|
class Platform(enum.Enum):
|
|
NAVER = 'naver'
|
|
DAUM = 'daum'
|
|
FACEBOOK = 'facebook'
|
|
KAKAO = 'kakao'
|
|
INSTA = 'insta'
|
|
TWITTER = 'twitter'
|
|
YOUTUBE = 'youtube'
|
|
|
|
|
|
class Proxy2Handler:
|
|
block_field_map = {
|
|
Platform.NAVER: Proxy2Model.naver_block_at,
|
|
Platform.DAUM: Proxy2Model.daum_block_at,
|
|
Platform.FACEBOOK: Proxy2Model.facebook_block_at,
|
|
Platform.KAKAO: Proxy2Model.kakao_block_at,
|
|
Platform.INSTA: Proxy2Model.insta_block_at,
|
|
Platform.TWITTER: Proxy2Model.twitter_block_at,
|
|
Platform.YOUTUBE: Proxy2Model.youtube_block_at,
|
|
}
|
|
|
|
def __init__(self):
|
|
self.lock = threading.Lock()
|
|
self.engine = sqlalchemy.create_engine('mysql+pymysql://admin:admin123@bigbird.iptime.org/concepters?charset=utf8')
|
|
session_factory = sqlalchemy.orm.sessionmaker(bind=self.engine)
|
|
self.session = sqlalchemy.orm.scoped_session(session_factory)
|
|
|
|
def lock_enter(self):
|
|
# logger.log('lock {}'.format(threading.current_thread().ident))
|
|
# self.lock.acquire()
|
|
pass
|
|
|
|
def lock_leave(self):
|
|
# self.lock.release()
|
|
# logger.log('unlock {}'.format(threading.current_thread().ident))
|
|
pass
|
|
|
|
def commit(self):
|
|
self.lock_enter()
|
|
self.session.commit()
|
|
self.lock_leave()
|
|
|
|
def get_oldest(self, platform):
|
|
self.lock_enter()
|
|
instance = self.session.query(Proxy2Model).order_by(self.block_field_map[platform].desc()).first()
|
|
self.lock_leave()
|
|
return instance
|
|
|
|
# def get(self, platform):
|
|
# proxy = self.session.query(Proxy2Model).filter(self.block_field_map[platform] == None).first()
|
|
# if not proxy:
|
|
# proxy_crawler.crawl_proxies()
|
|
#
|
|
# proxy = self.get_oldest(platform)
|
|
#
|
|
# return proxy
|
|
|
|
def get_query(self, ip, port):
|
|
return self.session.query(Proxy2Model).filter_by(ip=ip).filter_by(port=port)
|
|
|
|
def get_instance(self, ip, port):
|
|
self.lock_enter()
|
|
instance = self.get_query(ip, port).first()
|
|
self.lock_leave()
|
|
return instance
|
|
|
|
def get(self, platform, proc_id=-1):
|
|
self.lock_enter()
|
|
|
|
block_column = self.block_field_map[platform]
|
|
try:
|
|
instances = self.session.query(Proxy2Model).filter(block_column == None).all()
|
|
except Exception as e:
|
|
self.lock_leave()
|
|
|
|
try:
|
|
session_factory = sqlalchemy.orm.sessionmaker(bind=self.engine)
|
|
self.session = sqlalchemy.orm.scoped_session(session_factory)
|
|
except Exception as e2:
|
|
logger.log('{} session recreate'.format(proc_id))
|
|
|
|
return None
|
|
|
|
instance = instances[random.randint(0, len(instances)-1)] if len(instances) > 0 else None
|
|
if instance:
|
|
self.lock_leave()
|
|
return instance.get_instance_for_http()
|
|
else:
|
|
proxies = proxy_crawler.crawl_proxies()
|
|
self.insert_all(proxies)
|
|
self.unlock()
|
|
return self.get(platform, proc_id)
|
|
|
|
def insert(self, ip, port):
|
|
instance = self.get_instance(ip, port)
|
|
if not instance:
|
|
proxy = Proxy2Model(ip, port)
|
|
self.lock_enter()
|
|
self.session.add(proxy)
|
|
self.lock_leave()
|
|
|
|
def insert_all(self, proxies):
|
|
|
|
# INSERT INTO proxy2(ip, PORT)
|
|
# SELECT <ip>, <port> FROM DUAL
|
|
# WHERE NOT EXISTS (SELECT * FROM proxy2 WHERE ip=<ip> AND PORT=<port>)
|
|
|
|
instances_add = []
|
|
for proxy in proxies:
|
|
instance = self.get_instance(proxy['ip'], proxy['port'])
|
|
if not instance:
|
|
instances_add.append(Proxy2Model(proxy['ip'], proxy['port']))
|
|
self.session.bulk_save_objects(instances_add)
|
|
self.commit()
|
|
|
|
def set_proxy_blocked(self, ip, port, platform):
|
|
block_column = self.block_field_map[platform]
|
|
query = self.get_query(ip, port).filter(block_column == None)
|
|
query.update({block_column: datetime.datetime.now()})
|
|
self.commit()
|
|
|
|
if __name__ == '__main__':
|
|
proxy_handler = Proxy2Handler()
|
|
# proxy_handler.insert('127.0.0.5', 80)
|
|
# proxy_handler.commit()
|
|
|
|
# proxy_handler.set_proxy_blocked('127.0.0.3', 80, Platform.TWITTER)
|
|
|
|
# instance = proxy_handler.get(Platform.TWITTER)
|
|
# instance = proxy_handler.get_oldest(Platform.TWITTER)
|
|
# print(instance)
|
|
|
|
proxy = proxy_handler.get(Platform.TWITTER)
|