diff --git a/ProxyProcess/ProxyProcess.pro.user b/ProxyProcess/ProxyProcess.pro.user index e5df2ec..999d578 100644 --- a/ProxyProcess/ProxyProcess.pro.user +++ b/ProxyProcess/ProxyProcess.pro.user @@ -1,6 +1,6 @@ - + EnvironmentId @@ -227,7 +227,7 @@ ProxyProcess Qt4ProjectManager.Qt4RunConfiguration:C:/source/ProxyProcess/ProxyProcess.pro - + "http://www.gatherproxy.com/proxylist/anonymity/?t=Elite" "c:\data\proxytest.txt" ProxyProcess.pro false true diff --git a/ProxyProcess/scrawler.cpp b/ProxyProcess/scrawler.cpp index d7d4cf7..4056ce3 100644 --- a/ProxyProcess/scrawler.cpp +++ b/ProxyProcess/scrawler.cpp @@ -14,14 +14,12 @@ struct SProxyList int m_nPort; }; -SCrawler::SCrawler():QObject() +SCrawler::SCrawler():QObject(), m_bDone(false), m_bCrawled(false) { m_page = new QWebPage; connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool))); // p_timer = new QTimer(this); // connect(p_timer,SIGNAL(timeout()), this, SLOT(killProcess())); - QTimer::singleShot(60000, this, SLOT(killProcess())); - m_bCrawled = false; } @@ -32,9 +30,16 @@ SCrawler::~SCrawler() void SCrawler::load(QStringList _strlistArgv) { QUrl url(_strlistArgv.at(0)); - QNetworkRequest *request = new QNetworkRequest; + //QNetworkRequest *request = new QNetworkRequest; m_strUrl = _strlistArgv.at(0); + + if (m_strUrl.contains("gatherproxy.com")) + QTimer::singleShot(600000, this, SLOT(killProcess())); + else + QTimer::singleShot(60000, this, SLOT(killProcess())); + + m_page->settings()->setAttribute(QWebSettings::AutoLoadImages, true); m_page->settings()->setAttribute(QWebSettings::JavaEnabled, true); @@ -198,9 +203,22 @@ void SCrawler::saveResult(bool ok) } else if(m_strUrl.contains("gatherproxy.com")) { - strIpList = getIpListFromGatherProxy(); - if(strIpList.trimmed().size() > 0 ) - strIpList = addSource(strIpList, "gatherproxy.com"); + if (!m_bDone) + { + if (!m_bCrawled) + { + m_bCrawled = true; + getIpListFromGatherProxy(); + m_bCrawled = false; + } + return; + } + else + { + strIpList = m_strIpList.trimmed(); + if(strIpList.trimmed().size() > 0 ) + strIpList = addSource(strIpList, "gatherproxy.com"); + } } else if(m_strUrl.contains("wait3")) @@ -714,67 +732,67 @@ QString SCrawler::getIpListFromCoolProxy(const QWebElement _FindElement) -QString SCrawler::getIpListFromGatherProxy() +void SCrawler::getIpListFromGatherProxy() { + static bool b_first = true; QString totalResult; - QTcpSocket socket; - socket.connectToHost("65.50.243.103",80); - if(!socket.waitForConnected()) + if (b_first) { - qDebug() << "Error: " << socket.errorString(); + b_first = false; + QWebElement button = Find(m_page->mainFrame()->documentElement(), "input", "class", "button"); + button.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);"); + return; } - QString index = m_strUrl.right(2); - QRegExp re("(\\d+)"); - int pos = 0; - QString num; - while((pos = re.indexIn(index, pos)) != -1) + QWebElement webTable = Find(m_page->mainFrame()->documentElement(), "table", "id", "tblproxy"); + if (webTable.isNull()) { - num = re.cap(1); - pos += re.matchedLength(); + m_bDone = true; + QTimer::singleShot(1000, this, SLOT(saveResultManual())); + return; } - QString strheader = "POST /proxylist/anonymity/?t=Elite HTTP/1.1\r\n" - "Host: www.gatherproxy.com\r\n" - "Connection: keep-alive\r\n" - "Content-Length: " + QString::number(28+num.length()) + "\r\n" - "Cache-Control: max-age=0\r\n" - "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\r\n" - "Origin: http://www.gatherproxy.com\r\n" - "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36\r\n" - "Content-Type: application/x-www-form-urlencoded\r\n" - "Referer: http://www.gatherproxy.com/proxylist/anonymity/?t=Elite\r\n" - "Accept-Encoding: deflate\r\n" - "Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4\r\n\r\n" - "Type=elite&PageIdx=" + num + "&Uptime=0"; - socket.write(strheader.toUtf8()); - - QString strPacket; - while (socket.waitForReadyRead()) + QWebElementCollection trs = webTable.findAll("tr"); + if (trs.count() > 2) { - strPacket += QString::fromUtf8(socket.readAll()); - } - //Debug("c:/data/asdf.html", strPacket); - { - int pos = 0; - QRegExp re("\\s*"); - while((pos = re.indexIn(strPacket, pos)) != -1) + for (int i = 2; i < trs.count(); i++) { - QString ip = re.cap(1); - QString port = QString::number(getPort(re.cap(2))); - - if(!ip.isNull() && !port.isNull()) + QWebElementCollection tds = trs.at(i).findAll("td"); + if (tds.count() > 2) { + QString ip = tds.at(1).toPlainText(); + QString port = tds.at(2).toPlainText(); totalResult += (ip + "," + port + "\n"); } - - pos += re.matchedLength(); } - } - return totalResult.trimmed(); -} + m_strIpList += totalResult; + QWebElement webPageNavi = Find(m_page->mainFrame()->documentElement(), "div", "class", "pagenavi"); + if (webPageNavi.isNull()) + { + m_bDone = true; + QTimer::singleShot(1000, this, SLOT(saveResultManual())); + return; + } + QWebElement span = webPageNavi.findFirst("span"); + QWebElement webA = span.nextSibling(); + if (webA.isNull()) + { + m_bDone = true; + QTimer::singleShot(1000, this, SLOT(saveResultManual())); + return; + } + else + { + webA.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);"); + return; + } +} +void SCrawler::saveResultManual() +{ + saveResult(true); +} bool SCrawler::SendIpList(QString _strIpList) { @@ -850,7 +868,10 @@ QList SCrawler::FindAllMid(const QWebElement _FindElement,const QSt void SCrawler::killProcess() { - cout << endl << "timeout"; + if (m_strIpList.isEmpty()) + cout << endl << "timeout"; + else + cout << m_strIpList.toStdString() << "ok"; emit finished(); } diff --git a/ProxyProcess/scrawler.h b/ProxyProcess/scrawler.h index d575f66..79bc62f 100644 --- a/ProxyProcess/scrawler.h +++ b/ProxyProcess/scrawler.h @@ -20,6 +20,7 @@ signals: void finished(); private slots: void saveResult(bool ok); + void saveResultManual(); void killProcess(); private: int m_nSelect; @@ -34,6 +35,9 @@ private: QString m_strFolder; QString m_strLocation; + QString m_strIpList; + + bool m_bDone; bool m_bUse; bool m_bLast; bool m_bError; @@ -58,7 +62,7 @@ public: QString getIpListFromFreeproxy(const QWebElement _FindElement); QString getIpListFromXroxy(const QWebElement _FindElement); QString getIpListFromCoolProxy(const QWebElement _FindElement); - QString getIpListFromGatherProxy(); + void getIpListFromGatherProxy(); QString addSource(QString _strIpList, QString _strSource);