gatherproxy.com 사이트 크롤링 수정
git-svn-id: svn://192.168.0.12/source@269 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE QtCreatorProject>
|
||||
<!-- Written by QtCreator 3.3.0, 2015-10-15T15:42:58. -->
|
||||
<!-- Written by QtCreator 3.3.0, 2016-05-27T16:51:29. -->
|
||||
<qtcreator>
|
||||
<data>
|
||||
<variable>EnvironmentId</variable>
|
||||
@@ -227,7 +227,7 @@
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">ProxyProcess</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:C:/source/ProxyProcess/ProxyProcess.pro</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments">"http://www.gatherproxy.com/proxylist/anonymity/?t=Elite" "c:\data\proxytest.txt"</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">ProxyProcess.pro</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseDyldImageSuffix">false</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseTerminal">true</value>
|
||||
|
||||
@@ -14,14 +14,12 @@ struct SProxyList
|
||||
int m_nPort;
|
||||
};
|
||||
|
||||
SCrawler::SCrawler():QObject()
|
||||
SCrawler::SCrawler():QObject(), m_bDone(false), m_bCrawled(false)
|
||||
{
|
||||
m_page = new QWebPage;
|
||||
connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool)));
|
||||
// p_timer = new QTimer(this);
|
||||
// connect(p_timer,SIGNAL(timeout()), this, SLOT(killProcess()));
|
||||
QTimer::singleShot(60000, this, SLOT(killProcess()));
|
||||
m_bCrawled = false;
|
||||
|
||||
}
|
||||
|
||||
@@ -32,9 +30,16 @@ SCrawler::~SCrawler()
|
||||
void SCrawler::load(QStringList _strlistArgv)
|
||||
{
|
||||
QUrl url(_strlistArgv.at(0));
|
||||
QNetworkRequest *request = new QNetworkRequest;
|
||||
//QNetworkRequest *request = new QNetworkRequest;
|
||||
|
||||
m_strUrl = _strlistArgv.at(0);
|
||||
|
||||
if (m_strUrl.contains("gatherproxy.com"))
|
||||
QTimer::singleShot(600000, this, SLOT(killProcess()));
|
||||
else
|
||||
QTimer::singleShot(60000, this, SLOT(killProcess()));
|
||||
|
||||
|
||||
m_page->settings()->setAttribute(QWebSettings::AutoLoadImages, true);
|
||||
m_page->settings()->setAttribute(QWebSettings::JavaEnabled, true);
|
||||
|
||||
@@ -198,9 +203,22 @@ void SCrawler::saveResult(bool ok)
|
||||
}
|
||||
else if(m_strUrl.contains("gatherproxy.com"))
|
||||
{
|
||||
strIpList = getIpListFromGatherProxy();
|
||||
if(strIpList.trimmed().size() > 0 )
|
||||
strIpList = addSource(strIpList, "gatherproxy.com");
|
||||
if (!m_bDone)
|
||||
{
|
||||
if (!m_bCrawled)
|
||||
{
|
||||
m_bCrawled = true;
|
||||
getIpListFromGatherProxy();
|
||||
m_bCrawled = false;
|
||||
}
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
strIpList = m_strIpList.trimmed();
|
||||
if(strIpList.trimmed().size() > 0 )
|
||||
strIpList = addSource(strIpList, "gatherproxy.com");
|
||||
}
|
||||
}
|
||||
|
||||
else if(m_strUrl.contains("wait3"))
|
||||
@@ -714,67 +732,67 @@ QString SCrawler::getIpListFromCoolProxy(const QWebElement _FindElement)
|
||||
|
||||
|
||||
|
||||
QString SCrawler::getIpListFromGatherProxy()
|
||||
void SCrawler::getIpListFromGatherProxy()
|
||||
{
|
||||
static bool b_first = true;
|
||||
QString totalResult;
|
||||
QTcpSocket socket;
|
||||
socket.connectToHost("65.50.243.103",80);
|
||||
if(!socket.waitForConnected())
|
||||
if (b_first)
|
||||
{
|
||||
qDebug() << "Error: " << socket.errorString();
|
||||
b_first = false;
|
||||
QWebElement button = Find(m_page->mainFrame()->documentElement(), "input", "class", "button");
|
||||
button.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
return;
|
||||
}
|
||||
|
||||
QString index = m_strUrl.right(2);
|
||||
QRegExp re("(\\d+)");
|
||||
int pos = 0;
|
||||
QString num;
|
||||
while((pos = re.indexIn(index, pos)) != -1)
|
||||
QWebElement webTable = Find(m_page->mainFrame()->documentElement(), "table", "id", "tblproxy");
|
||||
if (webTable.isNull())
|
||||
{
|
||||
num = re.cap(1);
|
||||
pos += re.matchedLength();
|
||||
m_bDone = true;
|
||||
QTimer::singleShot(1000, this, SLOT(saveResultManual()));
|
||||
return;
|
||||
}
|
||||
|
||||
QString strheader = "POST /proxylist/anonymity/?t=Elite HTTP/1.1\r\n"
|
||||
"Host: www.gatherproxy.com\r\n"
|
||||
"Connection: keep-alive\r\n"
|
||||
"Content-Length: " + QString::number(28+num.length()) + "\r\n"
|
||||
"Cache-Control: max-age=0\r\n"
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\r\n"
|
||||
"Origin: http://www.gatherproxy.com\r\n"
|
||||
"User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36\r\n"
|
||||
"Content-Type: application/x-www-form-urlencoded\r\n"
|
||||
"Referer: http://www.gatherproxy.com/proxylist/anonymity/?t=Elite\r\n"
|
||||
"Accept-Encoding: deflate\r\n"
|
||||
"Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4\r\n\r\n"
|
||||
"Type=elite&PageIdx=" + num + "&Uptime=0";
|
||||
socket.write(strheader.toUtf8());
|
||||
|
||||
QString strPacket;
|
||||
while (socket.waitForReadyRead())
|
||||
QWebElementCollection trs = webTable.findAll("tr");
|
||||
if (trs.count() > 2)
|
||||
{
|
||||
strPacket += QString::fromUtf8(socket.readAll());
|
||||
}
|
||||
//Debug("c:/data/asdf.html", strPacket);
|
||||
{
|
||||
int pos = 0;
|
||||
QRegExp re("<td><script>document\\.write\\('(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})'\\)</script></td>\\s*<td><script>document\\.write\\(gp\\.dep\\('([A-Fa-f0-9]{2,4})'\\)\\)</script>");
|
||||
while((pos = re.indexIn(strPacket, pos)) != -1)
|
||||
for (int i = 2; i < trs.count(); i++)
|
||||
{
|
||||
QString ip = re.cap(1);
|
||||
QString port = QString::number(getPort(re.cap(2)));
|
||||
|
||||
if(!ip.isNull() && !port.isNull())
|
||||
QWebElementCollection tds = trs.at(i).findAll("td");
|
||||
if (tds.count() > 2)
|
||||
{
|
||||
QString ip = tds.at(1).toPlainText();
|
||||
QString port = tds.at(2).toPlainText();
|
||||
totalResult += (ip + "," + port + "\n");
|
||||
}
|
||||
|
||||
pos += re.matchedLength();
|
||||
}
|
||||
|
||||
}
|
||||
return totalResult.trimmed();
|
||||
}
|
||||
|
||||
m_strIpList += totalResult;
|
||||
QWebElement webPageNavi = Find(m_page->mainFrame()->documentElement(), "div", "class", "pagenavi");
|
||||
if (webPageNavi.isNull())
|
||||
{
|
||||
m_bDone = true;
|
||||
QTimer::singleShot(1000, this, SLOT(saveResultManual()));
|
||||
return;
|
||||
}
|
||||
QWebElement span = webPageNavi.findFirst("span");
|
||||
QWebElement webA = span.nextSibling();
|
||||
if (webA.isNull())
|
||||
{
|
||||
m_bDone = true;
|
||||
QTimer::singleShot(1000, this, SLOT(saveResultManual()));
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
webA.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
return;
|
||||
}
|
||||
}
|
||||
void SCrawler::saveResultManual()
|
||||
{
|
||||
saveResult(true);
|
||||
}
|
||||
|
||||
bool SCrawler::SendIpList(QString _strIpList)
|
||||
{
|
||||
@@ -850,7 +868,10 @@ QList<QWebElement> SCrawler::FindAllMid(const QWebElement _FindElement,const QSt
|
||||
|
||||
void SCrawler::killProcess()
|
||||
{
|
||||
cout << endl << "timeout";
|
||||
if (m_strIpList.isEmpty())
|
||||
cout << endl << "timeout";
|
||||
else
|
||||
cout << m_strIpList.toStdString() << "ok";
|
||||
emit finished();
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ signals:
|
||||
void finished();
|
||||
private slots:
|
||||
void saveResult(bool ok);
|
||||
void saveResultManual();
|
||||
void killProcess();
|
||||
private:
|
||||
int m_nSelect;
|
||||
@@ -34,6 +35,9 @@ private:
|
||||
QString m_strFolder;
|
||||
|
||||
QString m_strLocation;
|
||||
QString m_strIpList;
|
||||
|
||||
bool m_bDone;
|
||||
bool m_bUse;
|
||||
bool m_bLast;
|
||||
bool m_bError;
|
||||
@@ -58,7 +62,7 @@ public:
|
||||
QString getIpListFromFreeproxy(const QWebElement _FindElement);
|
||||
QString getIpListFromXroxy(const QWebElement _FindElement);
|
||||
QString getIpListFromCoolProxy(const QWebElement _FindElement);
|
||||
QString getIpListFromGatherProxy();
|
||||
void getIpListFromGatherProxy();
|
||||
|
||||
|
||||
QString addSource(QString _strIpList, QString _strSource);
|
||||
|
||||
Reference in New Issue
Block a user