148 lines
3.2 KiB
Python
148 lines
3.2 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
|
|
import sys
|
|
import io
|
|
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding = 'utf-8')
|
|
sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding = 'utf-8')
|
|
|
|
|
|
class torrent_file:
|
|
def __init__(self):
|
|
self.file_name = ""
|
|
self.link = ""
|
|
|
|
def __init__(self, name, link):
|
|
self.file_name = name
|
|
self.link = link
|
|
|
|
def __str__(self):
|
|
return "{{ {}: {} }}".format(self.file_name, self.link)
|
|
|
|
def __repr__(self):
|
|
return str(self)
|
|
|
|
class Options:
|
|
def __init__(self):
|
|
self.urls = [
|
|
'https://todaum.com/bbs/board.php?bo_table=torrent_kortv_ent&device=pc&page=',
|
|
]
|
|
|
|
self.max_page = 1
|
|
|
|
self.keywords = [
|
|
'무한도전.*720p.*NEXT',
|
|
'한끼줍쇼.*720p.*NEXT',
|
|
'라디오스타.*720p.*NEXT'
|
|
]
|
|
|
|
self.downloaded_ep = {
|
|
'무한도전': [ 533, ],
|
|
'라디오스타': [ 531 ]
|
|
}
|
|
|
|
def save(self):
|
|
pass
|
|
|
|
def load(self):
|
|
pass
|
|
|
|
|
|
def crawl_list(url, option):
|
|
code = requests.get(url)
|
|
html = code.text
|
|
soup = BeautifulSoup(html, 'lxml')
|
|
|
|
links = []
|
|
for link in soup.select('#fboardlist > div.tbl_head01.tbl_wrap > table > tbody > tr > td.td_subject > a'):
|
|
print(type(link))
|
|
print(link)
|
|
href = url+link.get('href')
|
|
print('fff : '+href)
|
|
# links.append(href)
|
|
|
|
return links
|
|
|
|
def crawl_downlink(url):
|
|
code = requests.get(url)
|
|
html = code.text
|
|
soup = BeautifulSoup(html, 'lxml')
|
|
|
|
links = []
|
|
a_tags = soup.find(id='bo_v_file').find_all('a', {'class':'view_file_download'})
|
|
for tag in a_tags:
|
|
file_name = str(tag.find('strong').text)
|
|
links.append(torrent_file(file_name, tag.get('href')))
|
|
|
|
return links
|
|
|
|
def download_files(files):
|
|
for file in files:
|
|
response = requests.get(file.link)
|
|
data = response.content
|
|
f = open(file.file_name, 'wb')
|
|
f.write(data)
|
|
f.close()
|
|
|
|
def print_log(files):
|
|
f = open('log.txt', 'at')
|
|
for file in files:
|
|
f.write(file.file_name+'\n')
|
|
f.close()
|
|
|
|
def crawl(url):
|
|
page_links = crawl_list(url)
|
|
files = []
|
|
for link in page_links:
|
|
files += crawl_downlink(link)
|
|
|
|
print_log(files)
|
|
|
|
download_files(files)
|
|
|
|
def do_it():
|
|
option = Options()
|
|
files = []
|
|
for url in option.urls:
|
|
for page in range(1, option.max_page+1):
|
|
page_url = url+str(page)
|
|
page_links = crawl_list(page_url, option)
|
|
|
|
# for link in page_links:
|
|
# files += crawl_downlink(link)
|
|
#
|
|
# download_files(files)
|
|
|
|
|
|
|
|
# crawl(urls[0])
|
|
|
|
|
|
|
|
do_it()
|
|
|
|
# test = [
|
|
# '무한도전.E533 170610 720p NEXT',
|
|
# '무한도전 E533 170610.720p NEXT',
|
|
# '무한도전.E533.170610.720p NEXT.mp4',
|
|
# '무한도전.E533.170610.720p NEXT',
|
|
# '무한도전.E533.170610.720p NEXT',
|
|
# '무한도전 E532 170603 720p NEXT',
|
|
# ]
|
|
#
|
|
# a = []
|
|
# # for k in test:
|
|
# # a += re.findall(r'[ .,]E([0-9]+)[ .,]', k)
|
|
#
|
|
# for k in test:
|
|
# m=re.search(r"(무한도전)(720p)(.NEXT)", k)
|
|
# print(k+': ')
|
|
# res = m.groups()
|
|
# print(len(res))
|
|
# print(res)
|
|
# print('')
|
|
#
|
|
# print(a)
|