diff --git a/ProxyProcess/ProxyProcess.pro.user b/ProxyProcess/ProxyProcess.pro.user index eeba9c5..44547e9 100644 --- a/ProxyProcess/ProxyProcess.pro.user +++ b/ProxyProcess/ProxyProcess.pro.user @@ -1,6 +1,6 @@ - + EnvironmentId @@ -61,7 +61,7 @@ Desktop Qt 5.4.0 MinGW 32bit Desktop Qt 5.4.0 MinGW 32bit qt.54.win32_mingw491_kit - 1 + 0 0 0 diff --git a/ProxyProcess/main.cpp b/ProxyProcess/main.cpp index d60bc04..53a6eb7 100644 --- a/ProxyProcess/main.cpp +++ b/ProxyProcess/main.cpp @@ -14,8 +14,8 @@ int main(int argc, char *argv[]) QApplication a(argc, argv); //QDateTime start = QDateTime::currentDateTime(); - a.setApplicationName(QString("Chrome")); - a.setApplicationVersion(QString("39.0.2171.95")); + a.setApplicationName(QString("Google Chrome")); + a.setApplicationVersion(QString("43.0.2357.81")); SCrawler *process = new SCrawler; QObject::connect(process, SIGNAL(finished()), QApplication::instance(), SLOT(quit())); diff --git a/ProxyProcess/scrawler.cpp b/ProxyProcess/scrawler.cpp index deb051f..5c8e2bb 100644 --- a/ProxyProcess/scrawler.cpp +++ b/ProxyProcess/scrawler.cpp @@ -20,6 +20,7 @@ SCrawler::SCrawler():QObject() // p_timer = new QTimer(this); // connect(p_timer,SIGNAL(timeout()), this, SLOT(killProcess())); QTimer::singleShot(90000, this, SLOT(killProcess())); + m_bCrawled = false; } SCrawler::~SCrawler() @@ -29,10 +30,11 @@ SCrawler::~SCrawler() void SCrawler::load(QStringList _strlistArgv) { QUrl url(_strlistArgv.at(0)); + QNetworkRequest *request = new QNetworkRequest; - m_page->settings()->setAttribute(QWebSettings::AutoLoadImages, false); - m_page->settings()->setAttribute(QWebSettings::JavaEnabled, false); - + m_strUrl = _strlistArgv.at(0); + m_page->settings()->setAttribute(QWebSettings::AutoLoadImages, true); + m_page->settings()->setAttribute(QWebSettings::JavaEnabled, true); if(_strlistArgv.length() > 1) m_strFolder = _strlistArgv.at(1).trimmed() + "\\"; @@ -42,13 +44,11 @@ void SCrawler::load(QStringList _strlistArgv) else m_strLocation = "local"; - m_page->currentFrame()->load(url); - + m_page->mainFrame()->load(url); } void SCrawler::UpdateError(QString _strError) { - m_bError = true; } @@ -56,14 +56,15 @@ void SCrawler::saveResult(bool ok) { if (!ok) { - std::cerr << "Failed loading " << qPrintable(m_page->mainFrame()->url().toString()) << std::endl; + cout << "Failed loading " << std::endl; emit finished(); return; } + QString strIpList; QWebElement p_parse = Find(m_page->currentFrame()->documentElement(),"div","class","row inner"); + strIpList = getIpListFromAss(p_parse); - QString strIpList = getIpList(p_parse); //Debug("test.html",m_page->currentFrame()->toHtml()); //Debug("filtered.html", p_parse.toInnerXml()); @@ -194,7 +195,7 @@ QWebElement SCrawler::FindLeft(const QWebElement _FindElement,const QString _str } -QString SCrawler::getIpList(const QWebElement _FindElement) +QString SCrawler::getIpListFromAss(const QWebElement _FindElement) { QString DISPLAYINLINE = "{display:inline}"; @@ -289,6 +290,29 @@ QString SCrawler::getIpList(const QWebElement _FindElement) } +QString SCrawler::getIpListFromFreeProxy(const QWebElement _FindElement) +{ + QWebElementCollection trs = _FindElement.findAll("tr"); + QString totalResult; + for(int i = 1; i < trs.count(); i++) + { + QWebElementCollection tds = trs.at(i).findAll("td"); + + if(tds.count() < 2) + continue; + + QString strip = tds.at(0).findFirst("a").toPlainText().trimmed(); + QString strport = tds.at(1).toPlainText().trimmed(); + totalResult += strip; + totalResult += ","; + totalResult += strport; + if(i < trs.count() - 1) + totalResult += "\n"; + } + + return totalResult; +} + bool SCrawler::SendIpList(QString _strIpList) { QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL"); @@ -305,6 +329,15 @@ bool SCrawler::SendIpList(QString _strIpList) return false; } + QStringList strListIpList = _strIpList.split('\n', QString::SkipEmptyParts); + + if(strListIpList.size() < 1) + { + cout << "sitedown"; + return false; + } + + QSqlQuery sql; //QString strQuery = "truncate table Proxy"; QString strQuery = "delete from Proxy"; @@ -318,7 +351,6 @@ bool SCrawler::SendIpList(QString _strIpList) //return; } - QStringList strListIpList = _strIpList.split('\n', QString::SkipEmptyParts); foreach(QString str, strListIpList) { @@ -358,3 +390,4 @@ void SCrawler::killProcess() cout << endl << "timeout"; emit finished(); } + diff --git a/ProxyProcess/scrawler.h b/ProxyProcess/scrawler.h index 0366602..fc911fb 100644 --- a/ProxyProcess/scrawler.h +++ b/ProxyProcess/scrawler.h @@ -35,15 +35,16 @@ private: bool m_bUse; bool m_bLast; bool m_bError; - + bool m_bCrawled; public: QString SqlString(QString _str); QString GetSafeUtf(QString _strData); int GetNumber(QString _str); bool SendIpList(QString _str); - - QString getIpList(const QWebElement _FindElement); + void SearchChildFrame(QWebFrame *frame); + QString getIpListFromAss(const QWebElement _FindElement); + QString getIpListFromFreeProxy(const QWebElement _FindElement); QWebElement Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind); QWebElement FindMid(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind, const int _strStart, const int _strLength); QWebElement FindMid(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind, const int _strStart);