프록시 사이트 추가

git-svn-id: svn://192.168.0.12/source@165 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2015-07-09 09:30:40 +00:00
parent 86f013b167
commit 17eb8b75ca
4 changed files with 432 additions and 90 deletions

View File

@@ -12,7 +12,7 @@ CONFIG -= app_bundle
TEMPLATE = app
SOURCES += main.cpp \
scrawler.cpp \
scrawler.cpp
HEADERS += \

View File

@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE QtCreatorProject>
<!-- Written by QtCreator 3.3.0, 2015-07-06T17:29:37. -->
<!-- Written by QtCreator 3.3.0, 2015-07-09T16:06:42. -->
<qtcreator>
<data>
<variable>EnvironmentId</variable>
@@ -227,7 +227,7 @@
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">ProxyProcess</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:C:/source/ProxyProcess/ProxyProcess.pro</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments">&quot;http://free-proxy.cz/en/proxylist/country/all/http/speed/level2&quot;</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments">&quot;http://www.gatherproxy.com/proxylist/anonymity/?t=Elite3&quot;</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">ProxyProcess.pro</value>
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseDyldImageSuffix">false</value>
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseTerminal">true</value>

View File

@@ -4,7 +4,8 @@
#include <QSqlError>
#include <QByteArray>
#include <qDebug>
#include <QRegExp>
#include <QThread>
using namespace std;
struct SProxyList
@@ -19,8 +20,9 @@ SCrawler::SCrawler():QObject()
connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool)));
// p_timer = new QTimer(this);
// connect(p_timer,SIGNAL(timeout()), this, SLOT(killProcess()));
QTimer::singleShot(90000, this, SLOT(killProcess()));
QTimer::singleShot(60000, this, SLOT(killProcess()));
m_bCrawled = false;
}
SCrawler::~SCrawler()
@@ -61,9 +63,6 @@ void SCrawler::saveResult(bool ok)
return;
}
Debug("c:/data/test3.html", m_page->currentFrame()->toHtml());
QString strIpList;
if(m_strUrl.contains("hidemyass"))
{
@@ -96,7 +95,7 @@ void SCrawler::saveResult(bool ok)
strIpList = addSource(strIpList, "cybersyndrome.net");
}
}
else if(m_strUrl.contains("proxylists"))
else if(m_strUrl.contains("proxylists.net"))
{
QWebElement p_parse = m_page->mainFrame()->findFirstElement("table");
@@ -107,34 +106,125 @@ void SCrawler::saveResult(bool ok)
strIpList = addSource(strIpList, "proxylists.net");
}
}
else if(m_strUrl.contains("txt.proxyspy.net"))
{
QString p_parse = m_page->mainFrame()->toPlainText();
if(!p_parse.isEmpty())
{
strIpList = getIpListFromProxySpy(p_parse);
if(strIpList.trimmed().size() > 0 )
strIpList = addSource(strIpList, "proxyspy.net");
}
/*
if(!p_parse.isNull())
{
strIpList = getIpListFromProxylists(p_parse);
if(strIpList.trimmed().size() > 0 )
strIpList = addSource(strIpList, "proxylists.net");
}
*/
}
else if(m_strUrl.contains("proxysearcher.sourceforge.net"))
{
QWebElement p_parse = m_page->mainFrame()->findFirstElement("body");
if(!p_parse.isNull())
{
strIpList = getIpListFromProxySearcher(p_parse);
if(strIpList.trimmed().size() > 0 )
strIpList = addSource(strIpList, "proxysearcher.sourceforge.net");
}
}
else if(m_strUrl.contains("proxylist.ro"))
{
QWebElement p_parse = m_page->mainFrame()->findFirstElement("body");
if(!p_parse.isNull())
{
strIpList = getIpListFromProxyListro(p_parse);
if(strIpList.trimmed().size() > 0 )
strIpList = addSource(strIpList, "proxylist.ro");
}
}
else if(m_strUrl.contains("samair.ru"))
{
QWebElement p_parse = Find(m_page->currentFrame()->documentElement(), "div", "id", "content");
if(!p_parse.isNull())
{
strIpList = getIpListFromSamuir(p_parse);
if(strIpList.trimmed().size() > 0 )
strIpList = addSource(strIpList, "samair.ru");
}
}
else if(m_strUrl.contains("nntime.com"))
{
QWebElement p_parse = Find(m_page->currentFrame()->documentElement(), "table", "id", "proxylist");
if(!p_parse.isNull())
{
strIpList = getIpListFromNntime(p_parse);
if(strIpList.trimmed().size() > 0 )
strIpList = addSource(strIpList, "nntime.com");
}
}
else if(m_strUrl.contains("free-proxy.cz"))
{
QWebElement p_parse = Find(m_page->currentFrame()->documentElement(), "table", "id", "proxy_list");
if(!p_parse.isNull())
{
strIpList = getIpListFromFreeproxy(p_parse);
if(strIpList.trimmed().size() > 0 )
strIpList = addSource(strIpList, "free-proxy.cz");
}
}
else if(m_strUrl.contains("xroxy.com"))
{
QWebElement p_parse = m_page->currentFrame()->documentElement().findFirst("body");
if(!p_parse.isNull())
{
strIpList = getIpListFromXroxy(p_parse);
if(strIpList.trimmed().size() > 0 )
strIpList = addSource(strIpList, "xroxy.com");
}
}
else if(m_strUrl.contains("cool-proxy.net"))
{
QWebElement p_parse = Find(m_page->currentFrame()->documentElement(), "div", "id", "main");
if(!p_parse.isNull())
{
strIpList = getIpListFromCoolProxy(p_parse);
if(strIpList.trimmed().size() > 0 )
strIpList = addSource(strIpList, "cool-proxy.net");
}
}
else if(m_strUrl.contains("gatherproxy.com"))
{
strIpList = getIpListFromGatherProxy();
if(strIpList.trimmed().size() > 0 )
strIpList = addSource(strIpList, "gatherproxy.com");
}
else if(m_strUrl.contains("wait3"))
{
QThread::sleep(3);
}
else if(m_strUrl.contains("wait5"))
{
QThread::sleep(5);
}
QThread::sleep(5);
//qDebug() << strIpList;
//Debug("c:/data/test3.html", m_page->mainFrame()->toHtml());
if(strIpList.trimmed().size() > 0)
cout << strIpList.trimmed().toStdString();
// success to crawling
if(strIpList.size() > 8)
{
// in case sending iplist to db
if(m_strLocation.compare("local") != 0)
{
// success to send ip list to db
if(SendIpList(strIpList))
{
cout << endl << "uok";
}
// fail to sen ip list to db
else
{
cout << endl << "fok";
}
}
// in case not sending iplist to db
else
{
cout << endl << "ok";
}
cout << endl << "ok";
}
// fail to crawling
else
{
cout << "sitedown";
@@ -408,7 +498,284 @@ QString SCrawler::getIpListFromProxylists(const QWebElement _FindElement)
return totalResult.trimmed();
}
/*
QString SCrawler::getIpListFromProxySpy(const QString _txt)
{
QStringList strlist = _txt.split("\n", QString::SkipEmptyParts);
QString totalResult;
//qDebug() << _txt;
QRegExp re("(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}):(\\d{2,5})\\s*[a-zA-Z]+-(N|A|H)\\S*");
foreach(QString str, strlist)
{
int pos = 0;
while((pos = re.indexIn(str, pos)) != -1)
{
if((re.cap(3) == "A") || (re.cap(3)) == "H")
totalResult += (re.cap(1) + "," + re.cap(2) + "\n");
pos += re.matchedLength();
}
}
return totalResult.trimmed();
}
QString SCrawler::getIpListFromProxySearcher(const QWebElement _FindElement)
{
QString totalResult;
QRegExp re("(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}):(\\d{2,5})");
QString str = _FindElement.toPlainText();
int pos = 0;
while((pos = re.indexIn(str, pos)) != -1)
{
totalResult += (re.cap(1) + "," + re.cap(2) + "\n");
pos += re.matchedLength();
}
return totalResult.trimmed();
}
QString SCrawler::getIpListFromProxyListro(const QWebElement _FindElement)
{
QString totalResult;
QWebElementCollection trs = _FindElement.findAll("tr");
foreach(QWebElement tr, trs)
{
QString strclass = tr.attribute("class").trimmed();
if((strclass.compare("speed1") == 0) || (strclass.compare("speed2") == 0))
{
QWebElementCollection tds = tr.findAll("td");
if(tds.count() < 4)
continue;
if((tds.at(3).toPlainText().trimmed() == "Y") || (tds.at(3).toPlainText().trimmed() == "y"))
{
totalResult += tds.at(1).toPlainText().trimmed().replace("\"","").trimmed() + "," + tds.at(2).toPlainText().trimmed().replace("\"","").trimmed() + "\n";
}
}
}
return totalResult.trimmed();
}
QString SCrawler::getIpListFromSamuir(const QWebElement _FindElement)
{
QString totalResult;
QWebElement table = Find(_FindElement, "table", "id", "proxylist");
QWebElementCollection trs = table.findAll("tr");
foreach(QWebElement tr, trs)
{
QWebElementCollection tds = tr.findAll("td");
if(tds.count() < 3)
continue;
if(tds.at(1).toPlainText().contains("anony"))
{
QString temp = tds.at(0).toPlainText().replace("\"","").trimmed();
totalResult += (temp.replace(":",",") + "\n");
}
}
return totalResult.trimmed();
}
QString SCrawler::getIpListFromNntime(const QWebElement _FindElement)
{
QString totalResult;
QWebElementCollection trs = _FindElement.findAll("tr");
QRegExp re("(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}):(\\d{2,5})");
foreach(QWebElement tr, trs)
{
QWebElementCollection tds = tr.findAll("td");
if(tds.count() < 4)
continue;
if(tds.at(2).toPlainText().contains("anony"))
{
int pos = 0;
while((pos = re.indexIn(tds.at(1).toPlainText().replace("\"",""), pos)) != -1)
{
totalResult += (re.cap(1) + "," + re.cap(2) + "\n");
pos += re.matchedLength();
}
}
}
return totalResult.trimmed();
}
QString SCrawler::getIpListFromFreeproxy(const QWebElement _FindElement)
{
QString totalResult;
QWebElementCollection trs = _FindElement.findAll("tr");
foreach(QWebElement tr, trs)
{
QWebElementCollection tds = tr.findAll("td");
if(tds.count() < 7)
continue;
totalResult += (tds.at(0).toPlainText().trimmed() + "," + tds.at(1).toPlainText().trimmed() + "\n");
}
return totalResult.trimmed();
}
QString SCrawler::getIpListFromXroxy(const QWebElement _FindElement)
{
QString totalResult;
QWebElementCollection trs = _FindElement.findAll("tr");
QRegExp reip("(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})");
QRegExp repo("(\\d{2,5})");
foreach(QWebElement tr, trs)
{
QWebElementCollection tds = tr.findAll("td");
QString ip;
QString port;
if(tds.count() < 7)
continue;
{
int pos = 0;
while((pos = reip.indexIn(tds.at(1).toPlainText().replace("\"","").trimmed(), pos)) != -1)
{
ip = reip.cap(1);
pos += reip.matchedLength();
}
}
{
int pos = 0;
while((pos = repo.indexIn(tds.at(2).toPlainText().replace("\"","").trimmed(), pos)) != -1)
{
port = repo.cap(1);
pos += repo.matchedLength();
}
}
if(!ip.isEmpty() && !port.isEmpty())
{
totalResult += (ip + "," + port + "\n");
}
{
ip.clear();
port.clear();
}
}
return totalResult.trimmed();
}
QString SCrawler::getIpListFromCoolProxy(const QWebElement _FindElement)
{
QString totalResult;
QWebElementCollection trs = _FindElement.findAll("tr");
QRegExp reip("(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})");
QRegExp repo("(\\d{2,5})");
foreach(QWebElement tr, trs)
{
QWebElementCollection tds = tr.findAll("td");
QString ip;
QString port;
if(tds.count() < 7)
continue;
{
int pos = 0;
while((pos = reip.indexIn(tds.at(0).toPlainText().replace("\"","").trimmed(), pos)) != -1)
{
ip = reip.cap(1);
pos += reip.matchedLength();
}
}
{
int pos = 0;
while((pos = repo.indexIn(tds.at(1).toPlainText().replace("\"","").trimmed(), pos)) != -1)
{
port = repo.cap(1);
pos += repo.matchedLength();
}
}
if(!ip.isEmpty() && !port.isEmpty())
{
totalResult += (ip + "," + port + "\n");
}
{
ip.clear();
port.clear();
}
}
return totalResult.trimmed();
}
QString SCrawler::getIpListFromGatherProxy()
{
QString totalResult;
QTcpSocket socket;
socket.connectToHost("65.50.243.103",80);
if(!socket.waitForConnected())
{
qDebug() << "Error: " << socket.errorString();
}
QString index = m_strUrl.right(2);
QRegExp re("(\\d+)");
int pos = 0;
QString num;
while((pos = re.indexIn(index, pos)) != -1)
{
num = re.cap(1);
pos += re.matchedLength();
}
QString strheader = "POST /proxylist/anonymity/?t=Elite HTTP/1.1\r\n"
"Host: www.gatherproxy.com\r\n"
"Connection: keep-alive\r\n"
"Content-Length: " + QString::number(28+num.length()) + "\r\n"
"Cache-Control: max-age=0\r\n"
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\r\n"
"Origin: http://www.gatherproxy.com\r\n"
"User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36\r\n"
"Content-Type: application/x-www-form-urlencoded\r\n"
"Referer: http://www.gatherproxy.com/proxylist/anonymity/?t=Elite\r\n"
"Accept-Encoding: deflate\r\n"
"Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4\r\n\r\n"
"Type=elite&PageIdx=" + num + "&Uptime=0";
socket.write(strheader.toUtf8());
QString strPacket;
while (socket.waitForReadyRead())
{
strPacket += QString::fromUtf8(socket.readAll());
}
//Debug("c:/data/asdf.html", strPacket);
{
int pos = 0;
QRegExp re("<td><script>document\\.write\\('(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})'\\)</script></td>\\s*<td><script>document\\.write\\(gp\\.dep\\('([A-Fa-f0-9]{2,4})'\\)\\)</script>");
while((pos = re.indexIn(strPacket, pos)) != -1)
{
QString ip = re.cap(1);
QString port = QString::number(getPort(re.cap(2)));
if(!ip.isNull() && !port.isNull())
{
totalResult += (ip + "," + port + "\n");
}
pos += re.matchedLength();
}
}
return totalResult.trimmed();
}
bool SCrawler::SendIpList(QString _strIpList)
{
QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL");
@@ -464,65 +831,6 @@ bool SCrawler::SendIpList(QString _strIpList)
}
return true;
}
*/
bool SCrawler::SendIpList(QString _strIpList)
{
QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL");
db.setHostName("bigbird.iptime.org");
db.setUserName("admin");
db.setPassword("admin123");
db.setDatabaseName("concepters");
if (db.open() == false)
{
qDebug() << "DB open Failed in SendIpList()";
return false;
}
QSqlQuery sql;
//QString strQuery = "truncate table Proxy";
/*
QString strQuery = "delete from Proxy";
QString strUtf8(strQuery.toUtf8());
if (sql.exec(strUtf8) == false)
{
p_labelStatus->setText("Delete Query\n Fail");
return false;
}
*/
QString strQuery;
QString strUtf8;
QStringList _slIpList = _strIpList.split("\n");
foreach(QString str, _slIpList)
{
strQuery = "insert into Proxy set Proxy='";
strQuery += str.split(',').at(0).trimmed();
strQuery += "', Port=";
strQuery += str.split(',').at(1).trimmed();
if(str.split(',').size() > 2)
{
strQuery += ", Source='";
strQuery += str.split(',').at(2).trimmed();
strQuery += "'";
}
strUtf8 = strQuery.toUtf8();
if (sql.exec(strUtf8) == false)
{
//InsertLog(sql.lastQuery() + "is Failed");
//return false;
cerr << sql.lastQuery().toStdString() << endl;
//cout << "PP send ip list failed" << endl;
}
}
//cout << "PP send ip list : " << _slIpList.size() << endl;
db.close();
return true;
}
QList<QWebElement> SCrawler::FindAllMid(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind, const int _strStart, const int _strLength)
{
@@ -564,4 +872,25 @@ QString SCrawler::addSource(QString _strIpList, QString _strSource)
return straddedList.join("\n").trimmed();
}
int SCrawler::getPort(QString _strport)
{
int result = 0;
for(int i = 0; i < _strport.length(); i++)
{
char strport = _strport.at(i).toLatin1();
if( 'a' <= strport && strport <= 'f' )
{
result += (((int)strport - (int)'a' + 10) << ((_strport.length()-1-i)*4));
}
else if( 'A' <= strport && strport <= 'F' )
{
result += (((int)strport - (int)'A' + 10) << ((_strport.length()-1-i)*4));
}
else if( '0' <= strport && strport <= '9')
{
result += (((int)strport - (int)'0') << ((_strport.length()-1-i)*4));
}
}
return result;
}

View File

@@ -4,6 +4,7 @@
#include <QtWebKitWidgets>
#include <QSqlDatabase>
#include <QTimer>
class Client;
class SCrawler : public QObject
{
Q_OBJECT
@@ -14,6 +15,7 @@ public:
void saveFile();
// static void Debug(QString _strFilename,QString _strData);
bool Debug(QString _strFilename,QString _strData);
signals:
void finished();
private slots:
@@ -41,12 +43,23 @@ public:
QString SqlString(QString _str);
QString GetSafeUtf(QString _strData);
int GetNumber(QString _str);
int getPort(QString _strport);
bool SendIpList(QString _str);
void SearchChildFrame(QWebFrame *frame);
QString getIpListFromAss(const QWebElement _FindElement);
QString getIpListFromNordVpn(const QWebElement _FindElement);
QString getIpListFromCyberSyndrom(const QWebElement _FindElement);
QString getIpListFromProxylists(const QWebElement _FindElement);
QString getIpListFromProxySpy(const QString _txt);
QString getIpListFromProxySearcher(const QWebElement _FindElement);
QString getIpListFromProxyListro(const QWebElement _FindElement);
QString getIpListFromSamuir(const QWebElement _FindElement);
QString getIpListFromNntime(const QWebElement _FindElement);
QString getIpListFromFreeproxy(const QWebElement _FindElement);
QString getIpListFromXroxy(const QWebElement _FindElement);
QString getIpListFromCoolProxy(const QWebElement _FindElement);
QString getIpListFromGatherProxy();
QString addSource(QString _strIpList, QString _strSource);
QWebElement Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind);