926 lines
28 KiB
C++
926 lines
28 KiB
C++
#include "scrawler.h"
|
|
#include <iostream>
|
|
#include <QSqlQuery>
|
|
#include <QSqlError>
|
|
#include <QByteArray>
|
|
#include <QDebug>
|
|
#include <QRegExp>
|
|
#include <QThread>
|
|
using namespace std;
|
|
|
|
struct SProxyList
|
|
{
|
|
QString m_strAddress;
|
|
int m_nPort;
|
|
};
|
|
|
|
SCrawler::SCrawler():QObject(), m_bDone(false), m_bCrawled(false)
|
|
{
|
|
m_page = new QWebPage;
|
|
connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool)));
|
|
// p_timer = new QTimer(this);
|
|
// connect(p_timer,SIGNAL(timeout()), this, SLOT(killProcess()));
|
|
|
|
}
|
|
|
|
SCrawler::~SCrawler()
|
|
{
|
|
}
|
|
|
|
void SCrawler::load(QStringList _strlistArgv)
|
|
{
|
|
QUrl url(_strlistArgv.at(0));
|
|
//QNetworkRequest *request = new QNetworkRequest;
|
|
|
|
m_strUrl = _strlistArgv.at(0);
|
|
|
|
if (m_strUrl.contains("gatherproxy.com"))
|
|
QTimer::singleShot(600000, this, SLOT(killProcess()));
|
|
else
|
|
QTimer::singleShot(60000, this, SLOT(killProcess()));
|
|
|
|
|
|
m_page->settings()->setAttribute(QWebSettings::AutoLoadImages, true);
|
|
m_page->settings()->setAttribute(QWebSettings::JavaEnabled, true);
|
|
|
|
if(_strlistArgv.length() > 1)
|
|
m_strFolder = _strlistArgv.at(1).trimmed() + "\\";
|
|
|
|
if(_strlistArgv.length() > 2)
|
|
m_strLocation = _strlistArgv.at(2).trimmed();
|
|
else
|
|
m_strLocation = "local";
|
|
|
|
m_page->mainFrame()->load(url);
|
|
}
|
|
|
|
void SCrawler::UpdateError(QString _strError)
|
|
{
|
|
m_bError = true;
|
|
}
|
|
|
|
void SCrawler::saveResult(bool ok)
|
|
{
|
|
if (!ok)
|
|
{
|
|
cout << "Failed loading";
|
|
emit finished();
|
|
return;
|
|
}
|
|
|
|
QString strIpList;
|
|
if(m_strUrl.contains("hidemyass"))
|
|
{
|
|
QWebElement p_parse = Find(m_page->currentFrame()->documentElement(),"div","class","row inner");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromAss(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "hidemyass.com");
|
|
}
|
|
}
|
|
else if(m_strUrl.contains("nordvpn"))
|
|
{
|
|
|
|
QWebElement p_parse = Find(m_page->currentFrame()->documentElement(),"table","class","list");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromNordVpn(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "nordvpn.com");
|
|
}
|
|
}
|
|
else if(m_strUrl.contains("cyber"))
|
|
{
|
|
QWebElement p_parse = FindLeft(m_page->currentFrame()->documentElement(),"ol","style","list");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromCyberSyndrom(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "cybersyndrome.net");
|
|
}
|
|
}
|
|
else if(m_strUrl.contains("proxylists.net"))
|
|
{
|
|
|
|
QWebElement p_parse = m_page->mainFrame()->findFirstElement("table");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromProxylists(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "proxylists.net");
|
|
}
|
|
}
|
|
else if(m_strUrl.contains("txt.proxyspy.net"))
|
|
{
|
|
QString p_parse = m_page->mainFrame()->toPlainText();
|
|
if(!p_parse.isEmpty())
|
|
{
|
|
strIpList = getIpListFromProxySpy(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "proxyspy.net");
|
|
}
|
|
/*
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromProxylists(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "proxylists.net");
|
|
}
|
|
*/
|
|
}
|
|
else if(m_strUrl.contains("proxysearcher.sourceforge.net"))
|
|
{
|
|
QWebElement p_parse = m_page->mainFrame()->findFirstElement("body");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromProxySearcher(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "proxysearcher.sourceforge.net");
|
|
}
|
|
}
|
|
else if(m_strUrl.contains("proxylist.ro"))
|
|
{
|
|
QWebElement p_parse = m_page->mainFrame()->findFirstElement("body");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromProxyListro(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "proxylist.ro");
|
|
}
|
|
}
|
|
|
|
else if(m_strUrl.contains("samair.ru"))
|
|
{
|
|
QWebElement p_parse = Find(m_page->currentFrame()->documentElement(), "div", "id", "content");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromSamuir(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "samair.ru");
|
|
}
|
|
}
|
|
|
|
else if(m_strUrl.contains("nntime.com"))
|
|
{
|
|
QWebElement p_parse = Find(m_page->currentFrame()->documentElement(), "table", "id", "proxylist");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromNntime(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "nntime.com");
|
|
}
|
|
}
|
|
else if(m_strUrl.contains("free-proxy.cz"))
|
|
{
|
|
QWebElement p_parse = Find(m_page->currentFrame()->documentElement(), "table", "id", "proxy_list");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromFreeproxy(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "free-proxy.cz");
|
|
}
|
|
}
|
|
else if(m_strUrl.contains("xroxy.com"))
|
|
{
|
|
QWebElement p_parse = m_page->currentFrame()->documentElement().findFirst("body");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromXroxy(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "xroxy.com");
|
|
}
|
|
}
|
|
else if(m_strUrl.contains("cool-proxy.net"))
|
|
{
|
|
QWebElement p_parse = Find(m_page->currentFrame()->documentElement(), "div", "id", "main");
|
|
if(!p_parse.isNull())
|
|
{
|
|
strIpList = getIpListFromCoolProxy(p_parse);
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "cool-proxy.net");
|
|
}
|
|
}
|
|
else if(m_strUrl.contains("gatherproxy.com"))
|
|
{
|
|
if (!m_bDone)
|
|
{
|
|
if (!m_bCrawled)
|
|
{
|
|
m_bCrawled = true;
|
|
getIpListFromGatherProxy();
|
|
m_bCrawled = false;
|
|
}
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
strIpList = m_strIpList.trimmed();
|
|
if(strIpList.trimmed().size() > 0 )
|
|
strIpList = addSource(strIpList, "gatherproxy.com");
|
|
}
|
|
}
|
|
|
|
else if(m_strUrl.contains("wait3"))
|
|
{
|
|
QThread::sleep(3);
|
|
}
|
|
else if(m_strUrl.contains("wait5"))
|
|
{
|
|
QThread::sleep(5);
|
|
}
|
|
|
|
|
|
QThread::sleep(5);
|
|
//qDebug() << strIpList;
|
|
//Debug("c:/data/test3.html", m_page->mainFrame()->toHtml());
|
|
|
|
if(strIpList.trimmed().size() > 0)
|
|
cout << strIpList.trimmed().toStdString();
|
|
|
|
|
|
if(strIpList.size() > 8)
|
|
{
|
|
cout << endl << "ok";
|
|
}
|
|
else
|
|
{
|
|
cout << "sitedown";
|
|
}
|
|
emit finished();
|
|
}
|
|
|
|
int SCrawler::GetNumber(QString _str)
|
|
{
|
|
QString strNumber;
|
|
for (int i = 0; i < _str.size();i++)
|
|
{
|
|
if (_str.at(i).isNumber())
|
|
strNumber += _str.at(i);
|
|
}
|
|
return strNumber.toInt();
|
|
}
|
|
|
|
bool SCrawler::Debug(QString _strFilename,QString _strData)
|
|
{
|
|
QFile file(_strFilename);
|
|
QFile::remove(_strFilename);
|
|
if (!file.open(QIODevice::WriteOnly | QIODevice::Text | QIODevice::Append))
|
|
{
|
|
// if (!file.open(QIODevice::WriteOnly | QIODevice::Text | QIODevice::Truncate))
|
|
cout << "savefail" << endl;
|
|
emit finished();
|
|
return false;
|
|
}
|
|
QTextStream out(&file);
|
|
out << _strData;
|
|
file.close();
|
|
return true;
|
|
}
|
|
|
|
QString SCrawler::SqlString(QString _str)
|
|
{
|
|
_str = _str.replace("'","\\'");
|
|
_str = _str.replace("\"","\\\"");
|
|
return _str;
|
|
}
|
|
|
|
|
|
QString SCrawler::GetSafeUtf(QString _strData)
|
|
{
|
|
QString str;
|
|
QChar *pch = _strData.data();
|
|
|
|
for (int i = 0; i < _strData.length(); i++)
|
|
{
|
|
if (pch[i].unicode() >= 12593 && pch[i].unicode() <= 12622)
|
|
str += pch[i];
|
|
if (pch[i].unicode() >= 44032 && pch[i].unicode() <= 55203)
|
|
str += pch[i];
|
|
if (pch[i].isDigit() || pch[i].isNumber() || pch[i].isSpace() || pch[i].isLower() || pch[i].isUpper() || pch[i].isSymbol() )
|
|
str += pch[i];
|
|
}
|
|
return str;
|
|
}
|
|
|
|
QWebElement SCrawler::Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind)
|
|
{
|
|
QWebElementCollection elements = _FindElement.findAll(_strElement);
|
|
foreach (QWebElement element, elements)
|
|
{
|
|
if (element.attribute(_strAttrib) == _strFind)
|
|
{
|
|
return element;
|
|
}
|
|
}
|
|
QWebElement element;
|
|
return element;
|
|
}
|
|
|
|
QWebElement SCrawler::FindMid(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind, const int _strStart, const int _strLength)
|
|
{
|
|
QWebElementCollection elements = _FindElement.findAll(_strElement);
|
|
foreach (QWebElement element, elements)
|
|
{
|
|
QString str = element.attribute(_strAttrib).trimmed().mid(_strStart,_strLength);
|
|
if (QString::compare(str,_strFind,Qt::CaseInsensitive)==0)
|
|
{
|
|
return element;
|
|
}
|
|
}
|
|
QWebElement element;
|
|
return element;
|
|
}
|
|
|
|
QWebElement SCrawler::FindMid(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind, const int _strStart)
|
|
{
|
|
int _strLength = _strFind.length();
|
|
return FindMid(_FindElement, _strElement, _strAttrib, _strFind, _strStart, _strLength);
|
|
}
|
|
|
|
QWebElement SCrawler::FindRight(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind)
|
|
{
|
|
QWebElementCollection elements = _FindElement.findAll(_strElement);
|
|
foreach (QWebElement element, elements)
|
|
{
|
|
QString str = element.attribute(_strAttrib).trimmed().right(_strFind.length());
|
|
if (QString::compare(str,_strFind,Qt::CaseInsensitive)==0)
|
|
{
|
|
return element;
|
|
}
|
|
}
|
|
QWebElement element;
|
|
return element;
|
|
}
|
|
|
|
QWebElement SCrawler::FindLeft(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind)
|
|
{
|
|
int _strStart = 0;
|
|
return FindMid(_FindElement, _strElement, _strAttrib, _strFind, _strStart);
|
|
}
|
|
|
|
|
|
QString SCrawler::getIpListFromAss(const QWebElement _FindElement)
|
|
{
|
|
|
|
QString DISPLAYINLINE = "{display:inline}";
|
|
QString DISPLAYNONE = "{display:none}";
|
|
QString totalResult;
|
|
|
|
QWebElementCollection trs = _FindElement.findAll("tr");
|
|
|
|
|
|
for(int i = 1; i < trs.count(); i++)
|
|
{
|
|
QWebElementCollection tds = trs.at(i).findAll("td");
|
|
|
|
QWebElement firstStyle = trs.at(i).findFirst("style");
|
|
|
|
QString strFirstStyle = firstStyle.toInnerXml().trimmed();
|
|
|
|
QStringList disinline;
|
|
QStringList disnone;
|
|
// {display:inline} {display:none}
|
|
// Find display:inline list
|
|
// Find display:none list
|
|
QStringList strlist = strFirstStyle.split(".", QString::SkipEmptyParts);
|
|
|
|
foreach(QString str, strlist)
|
|
{
|
|
if(str.trimmed().right(DISPLAYINLINE.length()).compare(DISPLAYINLINE) == 0)
|
|
{
|
|
disinline.append(str.trimmed().left(str.trimmed().length() - DISPLAYINLINE.length()));
|
|
//cout << DISPLAYINLINE.toStdString() << " : " << str.trimmed().left(str.trimmed().length() - DISPLAYINLINE.length()).toStdString() << endl;
|
|
}
|
|
else
|
|
{
|
|
disnone.append(str.trimmed().left(str.trimmed().length() - DISPLAYNONE.length()));
|
|
//cout << DISPLAYNONE.toStdString() << " : " << str.trimmed().left(str.trimmed().length() - DISPLAYNONE.length()).toStdString() << endl;
|
|
}
|
|
}
|
|
|
|
//Change class -> display:inline
|
|
//Change class -> display:none
|
|
QString strIpContents = tds.at(1).toInnerXml();
|
|
foreach(QString str, disinline)
|
|
{
|
|
strIpContents.replace(str.trimmed(),"display: inline");
|
|
}
|
|
foreach(QString str, disnone)
|
|
{
|
|
strIpContents.replace(str.trimmed(),"display:none");
|
|
}
|
|
|
|
//cout << strIpContents.toStdString() << endl;
|
|
|
|
//Parsing contents and extracting ip and port each table row
|
|
QStringList strIpContentsList = strIpContents.split(QRegExp("[<>]"), QString::SkipEmptyParts);
|
|
QString result;
|
|
for(int j = 0; j < strIpContentsList.count(); j++)
|
|
{
|
|
QString str1 = strIpContentsList.at(j).trimmed();
|
|
|
|
if(str1.at(0) == '/')
|
|
continue;
|
|
if(str1.left(4).compare("span") == 0)
|
|
continue;
|
|
if(str1.left(3).compare("div") == 0)
|
|
continue;
|
|
if(str1.left(5).compare("style") == 0)
|
|
continue;
|
|
if(str1.mid(1,3).compare("dis") == 0)
|
|
continue;
|
|
|
|
if(j > 0)
|
|
{
|
|
QString str2 = strIpContentsList.at(j-1).trimmed();
|
|
if(str2.right(5).compare("none\"") == 0)
|
|
continue;
|
|
}
|
|
|
|
result += str1;
|
|
}
|
|
|
|
//cout << strIpContentsList.length() << endl;
|
|
//cout << result.toStdString() << endl;
|
|
//Debug("proxy.txt",result);
|
|
|
|
QString strport = tds.at(2).toInnerXml().trimmed();
|
|
result += ("," + strport);
|
|
totalResult += result;
|
|
if(i < trs.count() - 1)
|
|
totalResult += "\n";
|
|
}
|
|
return totalResult;
|
|
}
|
|
|
|
|
|
QString SCrawler::getIpListFromNordVpn(const QWebElement _FindElement)
|
|
{
|
|
QWebElement tbody = _FindElement.findFirst("tbody");
|
|
QWebElementCollection trs = tbody.findAll("tr");
|
|
QString totalResult;
|
|
for(int i = 0; i < trs.count(); i++)
|
|
{
|
|
QWebElementCollection ths = trs.at(i).findAll("th");
|
|
|
|
if(ths.count() < 4)
|
|
continue;
|
|
|
|
QString strip = ths.at(2).toPlainText().trimmed();
|
|
QString strport = ths.at(3).toPlainText().trimmed();
|
|
totalResult += strip;
|
|
totalResult += ",";
|
|
totalResult += strport;
|
|
if(i < trs.count() - 1)
|
|
totalResult += "\n";
|
|
}
|
|
|
|
return totalResult;
|
|
}
|
|
|
|
QString SCrawler::getIpListFromCyberSyndrom(const QWebElement _FindElement)
|
|
{
|
|
QString totalResult;
|
|
QWebElementCollection lis = _FindElement.findAll("li");
|
|
for(int i = 0; i < lis.count(); i++)
|
|
{
|
|
QString str = lis.at(i).toPlainText().trimmed().replace(":",",");
|
|
totalResult += str;
|
|
if(i < lis.count() - 1)
|
|
totalResult += "\n";
|
|
}
|
|
return totalResult;
|
|
}
|
|
|
|
QString SCrawler::getIpListFromProxylists(const QWebElement _FindElement)
|
|
{
|
|
|
|
QString totalResult;
|
|
QWebElement table = _FindElement.findFirst("table");
|
|
QWebElementCollection trs = table.findAll("tr");
|
|
for(int i = 0; i < trs.count() - 1; i++)
|
|
{
|
|
QWebElementCollection tds = trs.at(i).findAll("td");
|
|
if(tds.count() < 4)
|
|
continue;
|
|
if(tds.at(2).toPlainText().compare("anonymous",Qt::CaseInsensitive) != 0)
|
|
continue;
|
|
|
|
QString ip = tds.at(0).toPlainText();
|
|
QString port = tds.at(1).toPlainText();
|
|
|
|
totalResult += ip.trimmed() + "," + port.trimmed() + "\n";
|
|
}
|
|
|
|
return totalResult.trimmed();
|
|
}
|
|
|
|
QString SCrawler::getIpListFromProxySpy(const QString _txt)
|
|
{
|
|
QStringList strlist = _txt.split("\n", QString::SkipEmptyParts);
|
|
QString totalResult;
|
|
//qDebug() << _txt;
|
|
QRegExp re("(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}):(\\d{2,5})\\s*[a-zA-Z]+-(N|A|H)\\S*");
|
|
foreach(QString str, strlist)
|
|
{
|
|
int pos = 0;
|
|
while((pos = re.indexIn(str, pos)) != -1)
|
|
{
|
|
if((re.cap(3) == "A") || (re.cap(3)) == "H")
|
|
totalResult += (re.cap(1) + "," + re.cap(2) + "\n");
|
|
pos += re.matchedLength();
|
|
}
|
|
}
|
|
return totalResult.trimmed();
|
|
}
|
|
|
|
QString SCrawler::getIpListFromProxySearcher(const QWebElement _FindElement)
|
|
{
|
|
QString totalResult;
|
|
QRegExp re("(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}):(\\d{2,5})");
|
|
QString str = _FindElement.toPlainText();
|
|
int pos = 0;
|
|
while((pos = re.indexIn(str, pos)) != -1)
|
|
{
|
|
totalResult += (re.cap(1) + "," + re.cap(2) + "\n");
|
|
pos += re.matchedLength();
|
|
}
|
|
return totalResult.trimmed();
|
|
}
|
|
|
|
QString SCrawler::getIpListFromProxyListro(const QWebElement _FindElement)
|
|
{
|
|
QString totalResult;
|
|
|
|
QWebElementCollection trs = _FindElement.findAll("tr");
|
|
|
|
foreach(QWebElement tr, trs)
|
|
{
|
|
QString strclass = tr.attribute("class").trimmed();
|
|
if((strclass.compare("speed1") == 0) || (strclass.compare("speed2") == 0))
|
|
{
|
|
QWebElementCollection tds = tr.findAll("td");
|
|
if(tds.count() < 4)
|
|
continue;
|
|
if((tds.at(3).toPlainText().trimmed() == "Y") || (tds.at(3).toPlainText().trimmed() == "y"))
|
|
{
|
|
totalResult += tds.at(1).toPlainText().trimmed().replace("\"","").trimmed() + "," + tds.at(2).toPlainText().trimmed().replace("\"","").trimmed() + "\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
return totalResult.trimmed();
|
|
}
|
|
|
|
QString SCrawler::getIpListFromSamuir(const QWebElement _FindElement)
|
|
{
|
|
QString totalResult;
|
|
|
|
QWebElement table = Find(_FindElement, "table", "id", "proxylist");
|
|
|
|
QWebElementCollection trs = table.findAll("tr");
|
|
|
|
foreach(QWebElement tr, trs)
|
|
{
|
|
QWebElementCollection tds = tr.findAll("td");
|
|
if(tds.count() < 3)
|
|
continue;
|
|
|
|
if(tds.at(1).toPlainText().contains("anony"))
|
|
{
|
|
QString temp = tds.at(0).toPlainText().replace("\"","").trimmed();
|
|
QString strIp = temp.replace(":","").trimmed();
|
|
QWebElement span = tds.at(0).findFirst("span");
|
|
QVariant qv = span.evaluateJavaScript("window.getComputedStyle(this, ':after').getPropertyValue('content')");
|
|
|
|
totalResult += (strIp + "," + qv.toString().replace("'","") + "\n");
|
|
}
|
|
}
|
|
|
|
return totalResult.trimmed();
|
|
}
|
|
|
|
QString SCrawler::getIpListFromNntime(const QWebElement _FindElement)
|
|
{
|
|
QString totalResult;
|
|
QWebElementCollection trs = _FindElement.findAll("tr");
|
|
|
|
QRegExp re("(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}):(\\d{2,5})");
|
|
|
|
foreach(QWebElement tr, trs)
|
|
{
|
|
QWebElementCollection tds = tr.findAll("td");
|
|
if(tds.count() < 4)
|
|
continue;
|
|
if(tds.at(2).toPlainText().contains("anony"))
|
|
{
|
|
int pos = 0;
|
|
while((pos = re.indexIn(tds.at(1).toPlainText().replace("\"",""), pos)) != -1)
|
|
{
|
|
totalResult += (re.cap(1) + "," + re.cap(2) + "\n");
|
|
pos += re.matchedLength();
|
|
}
|
|
}
|
|
}
|
|
|
|
return totalResult.trimmed();
|
|
}
|
|
|
|
QString SCrawler::getIpListFromFreeproxy(const QWebElement _FindElement)
|
|
{
|
|
QString totalResult;
|
|
QWebElementCollection trs = _FindElement.findAll("tr");
|
|
|
|
foreach(QWebElement tr, trs)
|
|
{
|
|
QWebElementCollection tds = tr.findAll("td");
|
|
if(tds.count() < 7)
|
|
continue;
|
|
|
|
totalResult += (tds.at(0).toPlainText().trimmed() + "," + tds.at(1).toPlainText().trimmed() + "\n");
|
|
}
|
|
|
|
return totalResult.trimmed();
|
|
}
|
|
|
|
QString SCrawler::getIpListFromXroxy(const QWebElement _FindElement)
|
|
{
|
|
QString totalResult;
|
|
QWebElementCollection trs = _FindElement.findAll("tr");
|
|
QRegExp reip("(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})");
|
|
QRegExp repo("(\\d{2,5})");
|
|
foreach(QWebElement tr, trs)
|
|
{
|
|
QWebElementCollection tds = tr.findAll("td");
|
|
QString ip;
|
|
QString port;
|
|
if(tds.count() < 7)
|
|
continue;
|
|
{
|
|
int pos = 0;
|
|
while((pos = reip.indexIn(tds.at(1).toPlainText().replace("\"","").trimmed(), pos)) != -1)
|
|
{
|
|
ip = reip.cap(1);
|
|
pos += reip.matchedLength();
|
|
}
|
|
}
|
|
{
|
|
int pos = 0;
|
|
while((pos = repo.indexIn(tds.at(2).toPlainText().replace("\"","").trimmed(), pos)) != -1)
|
|
{
|
|
port = repo.cap(1);
|
|
pos += repo.matchedLength();
|
|
}
|
|
}
|
|
if(!ip.isEmpty() && !port.isEmpty())
|
|
{
|
|
totalResult += (ip + "," + port + "\n");
|
|
}
|
|
|
|
{
|
|
ip.clear();
|
|
port.clear();
|
|
}
|
|
|
|
}
|
|
return totalResult.trimmed();
|
|
}
|
|
|
|
|
|
QString SCrawler::getIpListFromCoolProxy(const QWebElement _FindElement)
|
|
{
|
|
QString totalResult;
|
|
QWebElementCollection trs = _FindElement.findAll("tr");
|
|
|
|
QRegExp reip("(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})");
|
|
QRegExp repo("(\\d{2,5})");
|
|
|
|
|
|
foreach(QWebElement tr, trs)
|
|
{
|
|
QWebElementCollection tds = tr.findAll("td");
|
|
QString ip;
|
|
QString port;
|
|
if(tds.count() < 7)
|
|
continue;
|
|
{
|
|
int pos = 0;
|
|
while((pos = reip.indexIn(tds.at(0).toPlainText().replace("\"","").trimmed(), pos)) != -1)
|
|
{
|
|
ip = reip.cap(1);
|
|
pos += reip.matchedLength();
|
|
}
|
|
}
|
|
{
|
|
int pos = 0;
|
|
while((pos = repo.indexIn(tds.at(1).toPlainText().replace("\"","").trimmed(), pos)) != -1)
|
|
{
|
|
port = repo.cap(1);
|
|
pos += repo.matchedLength();
|
|
}
|
|
}
|
|
if(!ip.isEmpty() && !port.isEmpty())
|
|
{
|
|
totalResult += (ip + "," + port + "\n");
|
|
}
|
|
|
|
{
|
|
ip.clear();
|
|
port.clear();
|
|
}
|
|
}
|
|
|
|
return totalResult.trimmed();
|
|
}
|
|
|
|
|
|
|
|
void SCrawler::getIpListFromGatherProxy()
|
|
{
|
|
static bool b_first = true;
|
|
//cout << "getIpListFromGatherProxy";
|
|
QString totalResult;
|
|
if (b_first)
|
|
{
|
|
b_first = false;
|
|
QWebElement button = Find(m_page->mainFrame()->documentElement(), "input", "class", "button");
|
|
button.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
|
//cout << "show full list clicked";
|
|
return;
|
|
}
|
|
|
|
QWebElement webTable = Find(m_page->mainFrame()->documentElement(), "table", "id", "tblproxy");
|
|
if (webTable.isNull())
|
|
{
|
|
m_bDone = true;
|
|
QTimer::singleShot(1000, this, SLOT(saveResultManual()));
|
|
return;
|
|
}
|
|
|
|
QWebElementCollection trs = webTable.findAll("tr");
|
|
if (trs.count() > 2)
|
|
{
|
|
for (int i = 2; i < trs.count(); i++)
|
|
{
|
|
QWebElementCollection tds = trs.at(i).findAll("td");
|
|
if (tds.count() > 2)
|
|
{
|
|
QString ip = tds.at(1).toPlainText();
|
|
QString port = tds.at(2).toPlainText();
|
|
totalResult += (ip + "," + port + "\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
m_strIpList += totalResult;
|
|
QWebElement webPageNavi = Find(m_page->mainFrame()->documentElement(), "div", "class", "pagenavi");
|
|
if (webPageNavi.isNull())
|
|
{
|
|
m_bDone = true;
|
|
QTimer::singleShot(1000, this, SLOT(saveResultManual()));
|
|
//cout << "no webpageNavi";
|
|
return;
|
|
}
|
|
QWebElement span = webPageNavi.findFirst("span");
|
|
QWebElement webA = span.nextSibling();
|
|
if (webA.isNull())
|
|
{
|
|
m_bDone = true;
|
|
//cout << "next page is none";
|
|
QTimer::singleShot(1000, this, SLOT(saveResultManual()));
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
webA.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
|
return;
|
|
}
|
|
}
|
|
void SCrawler::saveResultManual()
|
|
{
|
|
saveResult(true);
|
|
}
|
|
|
|
bool SCrawler::SendIpList(QString _strIpList)
|
|
{
|
|
QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL");
|
|
db.setHostName("bigbird.iptime.org");
|
|
db.setUserName("admin");
|
|
db.setPassword("admin123");
|
|
db.setDatabaseName("concepters");
|
|
|
|
if (db.open() == false)
|
|
{
|
|
cout << "error : db open fail..." << endl;
|
|
cout << "dbcfail";
|
|
emit finished();
|
|
return false;
|
|
}
|
|
|
|
QStringList strListIpList = _strIpList.split('\n', QString::SkipEmptyParts);
|
|
|
|
if(strListIpList.size() < 1)
|
|
{
|
|
cout << "sitedown";
|
|
return false;
|
|
}
|
|
|
|
|
|
QSqlQuery sql;
|
|
//QString strQuery = "truncate table Proxy";
|
|
QString strQuery = "delete from Proxy";
|
|
QString strUtf8(strQuery.toUtf8());
|
|
|
|
if (sql.exec(strUtf8) == false)
|
|
{
|
|
cout << "error : " << sql.lastError().text().toStdString();
|
|
cout << endl << "dbdfail";
|
|
//emit finished();
|
|
//return;
|
|
}
|
|
|
|
|
|
foreach(QString str, strListIpList)
|
|
{
|
|
strQuery = "insert into Proxy set Proxy='";
|
|
strQuery += str.split(',').at(0).trimmed();
|
|
strQuery += "', Port=";
|
|
strQuery += str.split(',').at(1).trimmed();
|
|
strUtf8 = strQuery.toUtf8();
|
|
if (sql.exec(strUtf8) == false)
|
|
{
|
|
cout << "error : " << sql.lastError().text().toStdString();
|
|
cout << endl << "dbufail";
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
QList<QWebElement> SCrawler::FindAllMid(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind, const int _strStart, const int _strLength)
|
|
{
|
|
QWebElementCollection elements = _FindElement.findAll(_strElement);
|
|
QList<QWebElement> returnElements = QList<QWebElement>();
|
|
|
|
foreach (QWebElement element, elements)
|
|
{
|
|
QString str = element.attribute(_strAttrib).trimmed().mid(_strStart,_strLength);
|
|
if (QString::compare(str,_strFind,Qt::CaseInsensitive)==0)
|
|
{
|
|
returnElements.append(element);
|
|
}
|
|
}
|
|
return returnElements;
|
|
}
|
|
|
|
void SCrawler::killProcess()
|
|
{
|
|
if (m_strIpList.isEmpty())
|
|
cout << endl << "timeout";
|
|
else
|
|
cout << m_strIpList.toStdString() << "ok";
|
|
emit finished();
|
|
}
|
|
|
|
void SCrawler::SearchChildFrame(QWebFrame *frame)
|
|
{
|
|
Debug("c:/data/nordvpnloop.html", frame->toHtml());
|
|
foreach(QWebFrame *childFrame, frame->childFrames())
|
|
SearchChildFrame(childFrame);
|
|
}
|
|
|
|
QString SCrawler::addSource(QString _strIpList, QString _strSource)
|
|
{
|
|
QStringList strlistIpList = _strIpList.split("\n");
|
|
QStringList straddedList;
|
|
foreach(QString str, strlistIpList)
|
|
{
|
|
straddedList << (str + "," + _strSource);
|
|
}
|
|
return straddedList.join("\n").trimmed();
|
|
}
|
|
|
|
int SCrawler::getPort(QString _strport)
|
|
{
|
|
int result = 0;
|
|
for(int i = 0; i < _strport.length(); i++)
|
|
{
|
|
char strport = _strport.at(i).toLatin1();
|
|
if( 'a' <= strport && strport <= 'f' )
|
|
{
|
|
result += (((int)strport - (int)'a' + 10) << ((_strport.length()-1-i)*4));
|
|
}
|
|
else if( 'A' <= strport && strport <= 'F' )
|
|
{
|
|
result += (((int)strport - (int)'A' + 10) << ((_strport.length()-1-i)*4));
|
|
}
|
|
else if( '0' <= strport && strport <= '9')
|
|
{
|
|
result += (((int)strport - (int)'0') << ((_strport.length()-1-i)*4));
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|