gatherproxy.com 사이트 크롤링 수정
git-svn-id: svn://192.168.0.12/source@269 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<!DOCTYPE QtCreatorProject>
|
<!DOCTYPE QtCreatorProject>
|
||||||
<!-- Written by QtCreator 3.3.0, 2015-10-15T15:42:58. -->
|
<!-- Written by QtCreator 3.3.0, 2016-05-27T16:51:29. -->
|
||||||
<qtcreator>
|
<qtcreator>
|
||||||
<data>
|
<data>
|
||||||
<variable>EnvironmentId</variable>
|
<variable>EnvironmentId</variable>
|
||||||
@@ -227,7 +227,7 @@
|
|||||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">ProxyProcess</value>
|
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">ProxyProcess</value>
|
||||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:C:/source/ProxyProcess/ProxyProcess.pro</value>
|
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:C:/source/ProxyProcess/ProxyProcess.pro</value>
|
||||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments">"http://www.gatherproxy.com/proxylist/anonymity/?t=Elite" "c:\data\proxytest.txt"</value>
|
||||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">ProxyProcess.pro</value>
|
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">ProxyProcess.pro</value>
|
||||||
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseDyldImageSuffix">false</value>
|
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseDyldImageSuffix">false</value>
|
||||||
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseTerminal">true</value>
|
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseTerminal">true</value>
|
||||||
|
|||||||
@@ -14,14 +14,12 @@ struct SProxyList
|
|||||||
int m_nPort;
|
int m_nPort;
|
||||||
};
|
};
|
||||||
|
|
||||||
SCrawler::SCrawler():QObject()
|
SCrawler::SCrawler():QObject(), m_bDone(false), m_bCrawled(false)
|
||||||
{
|
{
|
||||||
m_page = new QWebPage;
|
m_page = new QWebPage;
|
||||||
connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool)));
|
connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool)));
|
||||||
// p_timer = new QTimer(this);
|
// p_timer = new QTimer(this);
|
||||||
// connect(p_timer,SIGNAL(timeout()), this, SLOT(killProcess()));
|
// connect(p_timer,SIGNAL(timeout()), this, SLOT(killProcess()));
|
||||||
QTimer::singleShot(60000, this, SLOT(killProcess()));
|
|
||||||
m_bCrawled = false;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -32,9 +30,16 @@ SCrawler::~SCrawler()
|
|||||||
void SCrawler::load(QStringList _strlistArgv)
|
void SCrawler::load(QStringList _strlistArgv)
|
||||||
{
|
{
|
||||||
QUrl url(_strlistArgv.at(0));
|
QUrl url(_strlistArgv.at(0));
|
||||||
QNetworkRequest *request = new QNetworkRequest;
|
//QNetworkRequest *request = new QNetworkRequest;
|
||||||
|
|
||||||
m_strUrl = _strlistArgv.at(0);
|
m_strUrl = _strlistArgv.at(0);
|
||||||
|
|
||||||
|
if (m_strUrl.contains("gatherproxy.com"))
|
||||||
|
QTimer::singleShot(600000, this, SLOT(killProcess()));
|
||||||
|
else
|
||||||
|
QTimer::singleShot(60000, this, SLOT(killProcess()));
|
||||||
|
|
||||||
|
|
||||||
m_page->settings()->setAttribute(QWebSettings::AutoLoadImages, true);
|
m_page->settings()->setAttribute(QWebSettings::AutoLoadImages, true);
|
||||||
m_page->settings()->setAttribute(QWebSettings::JavaEnabled, true);
|
m_page->settings()->setAttribute(QWebSettings::JavaEnabled, true);
|
||||||
|
|
||||||
@@ -198,9 +203,22 @@ void SCrawler::saveResult(bool ok)
|
|||||||
}
|
}
|
||||||
else if(m_strUrl.contains("gatherproxy.com"))
|
else if(m_strUrl.contains("gatherproxy.com"))
|
||||||
{
|
{
|
||||||
strIpList = getIpListFromGatherProxy();
|
if (!m_bDone)
|
||||||
if(strIpList.trimmed().size() > 0 )
|
{
|
||||||
strIpList = addSource(strIpList, "gatherproxy.com");
|
if (!m_bCrawled)
|
||||||
|
{
|
||||||
|
m_bCrawled = true;
|
||||||
|
getIpListFromGatherProxy();
|
||||||
|
m_bCrawled = false;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
strIpList = m_strIpList.trimmed();
|
||||||
|
if(strIpList.trimmed().size() > 0 )
|
||||||
|
strIpList = addSource(strIpList, "gatherproxy.com");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
else if(m_strUrl.contains("wait3"))
|
else if(m_strUrl.contains("wait3"))
|
||||||
@@ -714,67 +732,67 @@ QString SCrawler::getIpListFromCoolProxy(const QWebElement _FindElement)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
QString SCrawler::getIpListFromGatherProxy()
|
void SCrawler::getIpListFromGatherProxy()
|
||||||
{
|
{
|
||||||
|
static bool b_first = true;
|
||||||
QString totalResult;
|
QString totalResult;
|
||||||
QTcpSocket socket;
|
if (b_first)
|
||||||
socket.connectToHost("65.50.243.103",80);
|
|
||||||
if(!socket.waitForConnected())
|
|
||||||
{
|
{
|
||||||
qDebug() << "Error: " << socket.errorString();
|
b_first = false;
|
||||||
|
QWebElement button = Find(m_page->mainFrame()->documentElement(), "input", "class", "button");
|
||||||
|
button.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
QString index = m_strUrl.right(2);
|
QWebElement webTable = Find(m_page->mainFrame()->documentElement(), "table", "id", "tblproxy");
|
||||||
QRegExp re("(\\d+)");
|
if (webTable.isNull())
|
||||||
int pos = 0;
|
|
||||||
QString num;
|
|
||||||
while((pos = re.indexIn(index, pos)) != -1)
|
|
||||||
{
|
{
|
||||||
num = re.cap(1);
|
m_bDone = true;
|
||||||
pos += re.matchedLength();
|
QTimer::singleShot(1000, this, SLOT(saveResultManual()));
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
QString strheader = "POST /proxylist/anonymity/?t=Elite HTTP/1.1\r\n"
|
QWebElementCollection trs = webTable.findAll("tr");
|
||||||
"Host: www.gatherproxy.com\r\n"
|
if (trs.count() > 2)
|
||||||
"Connection: keep-alive\r\n"
|
|
||||||
"Content-Length: " + QString::number(28+num.length()) + "\r\n"
|
|
||||||
"Cache-Control: max-age=0\r\n"
|
|
||||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\r\n"
|
|
||||||
"Origin: http://www.gatherproxy.com\r\n"
|
|
||||||
"User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36\r\n"
|
|
||||||
"Content-Type: application/x-www-form-urlencoded\r\n"
|
|
||||||
"Referer: http://www.gatherproxy.com/proxylist/anonymity/?t=Elite\r\n"
|
|
||||||
"Accept-Encoding: deflate\r\n"
|
|
||||||
"Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4\r\n\r\n"
|
|
||||||
"Type=elite&PageIdx=" + num + "&Uptime=0";
|
|
||||||
socket.write(strheader.toUtf8());
|
|
||||||
|
|
||||||
QString strPacket;
|
|
||||||
while (socket.waitForReadyRead())
|
|
||||||
{
|
{
|
||||||
strPacket += QString::fromUtf8(socket.readAll());
|
for (int i = 2; i < trs.count(); i++)
|
||||||
}
|
|
||||||
//Debug("c:/data/asdf.html", strPacket);
|
|
||||||
{
|
|
||||||
int pos = 0;
|
|
||||||
QRegExp re("<td><script>document\\.write\\('(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})'\\)</script></td>\\s*<td><script>document\\.write\\(gp\\.dep\\('([A-Fa-f0-9]{2,4})'\\)\\)</script>");
|
|
||||||
while((pos = re.indexIn(strPacket, pos)) != -1)
|
|
||||||
{
|
{
|
||||||
QString ip = re.cap(1);
|
QWebElementCollection tds = trs.at(i).findAll("td");
|
||||||
QString port = QString::number(getPort(re.cap(2)));
|
if (tds.count() > 2)
|
||||||
|
|
||||||
if(!ip.isNull() && !port.isNull())
|
|
||||||
{
|
{
|
||||||
|
QString ip = tds.at(1).toPlainText();
|
||||||
|
QString port = tds.at(2).toPlainText();
|
||||||
totalResult += (ip + "," + port + "\n");
|
totalResult += (ip + "," + port + "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
pos += re.matchedLength();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
return totalResult.trimmed();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
m_strIpList += totalResult;
|
||||||
|
QWebElement webPageNavi = Find(m_page->mainFrame()->documentElement(), "div", "class", "pagenavi");
|
||||||
|
if (webPageNavi.isNull())
|
||||||
|
{
|
||||||
|
m_bDone = true;
|
||||||
|
QTimer::singleShot(1000, this, SLOT(saveResultManual()));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
QWebElement span = webPageNavi.findFirst("span");
|
||||||
|
QWebElement webA = span.nextSibling();
|
||||||
|
if (webA.isNull())
|
||||||
|
{
|
||||||
|
m_bDone = true;
|
||||||
|
QTimer::singleShot(1000, this, SLOT(saveResultManual()));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
webA.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void SCrawler::saveResultManual()
|
||||||
|
{
|
||||||
|
saveResult(true);
|
||||||
|
}
|
||||||
|
|
||||||
bool SCrawler::SendIpList(QString _strIpList)
|
bool SCrawler::SendIpList(QString _strIpList)
|
||||||
{
|
{
|
||||||
@@ -850,7 +868,10 @@ QList<QWebElement> SCrawler::FindAllMid(const QWebElement _FindElement,const QSt
|
|||||||
|
|
||||||
void SCrawler::killProcess()
|
void SCrawler::killProcess()
|
||||||
{
|
{
|
||||||
cout << endl << "timeout";
|
if (m_strIpList.isEmpty())
|
||||||
|
cout << endl << "timeout";
|
||||||
|
else
|
||||||
|
cout << m_strIpList.toStdString() << "ok";
|
||||||
emit finished();
|
emit finished();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ signals:
|
|||||||
void finished();
|
void finished();
|
||||||
private slots:
|
private slots:
|
||||||
void saveResult(bool ok);
|
void saveResult(bool ok);
|
||||||
|
void saveResultManual();
|
||||||
void killProcess();
|
void killProcess();
|
||||||
private:
|
private:
|
||||||
int m_nSelect;
|
int m_nSelect;
|
||||||
@@ -34,6 +35,9 @@ private:
|
|||||||
QString m_strFolder;
|
QString m_strFolder;
|
||||||
|
|
||||||
QString m_strLocation;
|
QString m_strLocation;
|
||||||
|
QString m_strIpList;
|
||||||
|
|
||||||
|
bool m_bDone;
|
||||||
bool m_bUse;
|
bool m_bUse;
|
||||||
bool m_bLast;
|
bool m_bLast;
|
||||||
bool m_bError;
|
bool m_bError;
|
||||||
@@ -58,7 +62,7 @@ public:
|
|||||||
QString getIpListFromFreeproxy(const QWebElement _FindElement);
|
QString getIpListFromFreeproxy(const QWebElement _FindElement);
|
||||||
QString getIpListFromXroxy(const QWebElement _FindElement);
|
QString getIpListFromXroxy(const QWebElement _FindElement);
|
||||||
QString getIpListFromCoolProxy(const QWebElement _FindElement);
|
QString getIpListFromCoolProxy(const QWebElement _FindElement);
|
||||||
QString getIpListFromGatherProxy();
|
void getIpListFromGatherProxy();
|
||||||
|
|
||||||
|
|
||||||
QString addSource(QString _strIpList, QString _strSource);
|
QString addSource(QString _strIpList, QString _strSource);
|
||||||
|
|||||||
Reference in New Issue
Block a user