78 lines
2.2 KiB
Python
78 lines
2.2 KiB
Python
import requests
|
|
import urllib
|
|
import bs4
|
|
import re
|
|
import os
|
|
|
|
from .Setting import Setting
|
|
from .ProxyHandler import ProxyHandler
|
|
from .Logger import Logger
|
|
from .Util import Util
|
|
from .DataType import PageLink, TorrentFile
|
|
|
|
|
|
class WorkerTfreeca:
|
|
def crawl_list(self, top_url, soup, re_title, re_episode, setting):
|
|
|
|
links = []
|
|
tables = soup.select('table.b_list')
|
|
trs = tables[0].select('tr')
|
|
for tr in trs[1:]:
|
|
tds = tr.select('td.subject')
|
|
if len(tds) < 1:
|
|
continue
|
|
|
|
title = tds[0].text.strip()
|
|
link = tds[0].select('a')[1].attrs['href']
|
|
|
|
title_match = re_title.search(title)
|
|
if not title_match:
|
|
continue
|
|
|
|
ep_match = re_episode.search(title)
|
|
if not ep_match:
|
|
continue
|
|
|
|
title_idx = int(title_match.lastgroup[3:])
|
|
video = setting.settings['video'][title_idx]
|
|
ep = int(ep_match.group(1))
|
|
|
|
if ep <= video['ignore_ep_under']:
|
|
Logger.log(' {}({}) is ignored (previous episode than ep{})'.format(video['title'], ep, video['ignore_ep_under']))
|
|
continue
|
|
elif ep in setting.downloaded[video['title']]:
|
|
Logger.log(' {}({}) is ignored (already downloaded)'.format(video['title'], ep))
|
|
continue
|
|
|
|
link = urllib.parse.urljoin(top_url, link)
|
|
links.append(PageLink(video['title'], ep, link))
|
|
|
|
Logger.log(' found content page : {}({}), {}'.format(video['title'], ep, link))
|
|
|
|
return links
|
|
|
|
def crawl_downlink(self, page_link, soup):
|
|
|
|
links = []
|
|
file_table = soup.select('table#file_table')
|
|
a_tags = file_table[0].select('a')
|
|
torrent_links = [a for a in a_tags if '.torrent' in a.text]
|
|
smi_links = [a for a in a_tags if '.smi' in a.text]
|
|
|
|
for link in torrent_links:
|
|
file_name = link.text.strip()
|
|
sub_url = link.attrs['href']
|
|
url = urllib.parse.urljoin(page_link.url, sub_url)
|
|
links.append(TorrentFile(page_link.title, page_link.episode, file_name, url))
|
|
|
|
Logger.log(' found download link : {}({})'.format(file_name, url))
|
|
|
|
for link in smi_links:
|
|
file_name = link.text.strip()
|
|
sub_url = link.attrs['href']
|
|
url = urllib.parse.urljoin(link.url, sub_url)
|
|
links.append(TorrentFile(page_link.title, page_link.episode, file_name, url))
|
|
|
|
Logger.log(' found download link : {}({})'.format(file_name, url))
|
|
|
|
return links |