네이버블로그 변경
git-svn-id: svn://192.168.0.12/source@268 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -13,8 +13,8 @@ int main(int argc, char *argv[])
|
||||
srand(time(0));
|
||||
QApplication a(argc, argv);
|
||||
a.setApplicationName(QString("Chrome"));
|
||||
a.setApplicationVersion(QString("39.0.2171.95"));
|
||||
|
||||
a.setApplicationVersion(QString("50.0.2661.102"));
|
||||
//39.0.2171.95
|
||||
QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL");
|
||||
db.setHostName("bigbird.iptime.org");
|
||||
db.setUserName("admin");
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
#include <QDebug>
|
||||
#include <QTimer>
|
||||
#include <QThread>
|
||||
#include <QRegExp>
|
||||
#include <ctime>
|
||||
|
||||
using namespace std;
|
||||
|
||||
@@ -22,7 +24,9 @@ SCrawler::SCrawler():QObject()
|
||||
{
|
||||
m_page = new QWebPage;
|
||||
m_nRetryCount = 0;
|
||||
m_bProcessed = false;
|
||||
connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool)));
|
||||
srand(time(NULL));
|
||||
}
|
||||
|
||||
SCrawler::~SCrawler()
|
||||
@@ -141,7 +145,6 @@ void SCrawler::load(QStringList _strlistArgv)
|
||||
}
|
||||
m_strTable = "data_" + _strlistArgv[3];
|
||||
}
|
||||
|
||||
cout << m_strUrl.toStdString() << endl;
|
||||
|
||||
QUrl url = QUrl(m_strUrl);
|
||||
@@ -181,6 +184,8 @@ void SCrawler::UpdateError(QString _strError)
|
||||
|
||||
void SCrawler::saveResult(bool ok)
|
||||
{
|
||||
qDebug() << "saveResult";
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
cout << "Failed loading";
|
||||
@@ -188,7 +193,7 @@ void SCrawler::saveResult(bool ok)
|
||||
emit finished();
|
||||
return;
|
||||
}
|
||||
qDebug() << "load complete";
|
||||
//qDebug() << "load complete";
|
||||
switch(m_nSelect)
|
||||
{
|
||||
case E_NAVER_NEWS_LIST:saveFrameNewsList(m_page->mainFrame());break;
|
||||
@@ -216,7 +221,13 @@ void SCrawler::saveResult(bool ok)
|
||||
bodydata.sendDB();
|
||||
break;
|
||||
}
|
||||
case E_NAVER_BLOG_LIST:saveFrameList(m_page->mainFrame());break;
|
||||
case E_NAVER_BLOG_LIST:
|
||||
{
|
||||
if(saveFrameList(m_page->mainFrame()))
|
||||
break;
|
||||
else
|
||||
return;
|
||||
}
|
||||
case E_NAVER_BLOG_BODY:
|
||||
{
|
||||
if(!saveFrameUrl(m_page->mainFrame()))
|
||||
@@ -297,6 +308,7 @@ void SCrawler::saveResult(bool ok)
|
||||
break;
|
||||
}
|
||||
emit finished();
|
||||
|
||||
}
|
||||
|
||||
int SCrawler::GetNumber(QString _str)
|
||||
@@ -310,6 +322,19 @@ int SCrawler::GetNumber(QString _str)
|
||||
return strNumber.toInt();
|
||||
}
|
||||
|
||||
|
||||
int SCrawler::GetNumber(QString _str, bool &ok)
|
||||
{
|
||||
QString strNumber;
|
||||
for (int i = 0; i < _str.size();i++)
|
||||
{
|
||||
if (_str.at(i).isNumber())
|
||||
strNumber += _str.at(i);
|
||||
}
|
||||
return strNumber.toInt(&ok);
|
||||
}
|
||||
|
||||
|
||||
void SCrawler::Debug(QString _strFilename,QString _strData)
|
||||
{
|
||||
QFile file(_strFilename);
|
||||
@@ -345,20 +370,55 @@ QString SCrawler::GetSafeUtf(QString _strData)
|
||||
return str;
|
||||
}
|
||||
|
||||
void SCrawler::saveFrameList(QWebFrame *frame)
|
||||
void SCrawler::reloadListPage()
|
||||
{
|
||||
if (m_bUse == true) return;
|
||||
++m_nRetryCount;
|
||||
if (m_nRetryCount >= RETRY_MAX)
|
||||
{
|
||||
cout << "block";
|
||||
emit finished();
|
||||
return;
|
||||
}
|
||||
m_bProcessed = false;
|
||||
saveResult(true);
|
||||
}
|
||||
|
||||
|
||||
bool SCrawler::saveFrameList(QWebFrame *frame)
|
||||
{
|
||||
|
||||
if (m_bProcessed == false)
|
||||
m_bProcessed = true;
|
||||
else
|
||||
return false;
|
||||
|
||||
//qDebug() << frame->documentElement().toPlainText();
|
||||
|
||||
if (m_bUse == true) return true;
|
||||
QWebElement notFound = Find(frame->documentElement(),"div","id","notfound");
|
||||
if(notFound.isNull() == false)
|
||||
{
|
||||
m_bLast = true;
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
QWebElement naverBlock = Find(frame->documentElement(), "p", "class", "info01");
|
||||
if (!naverBlock.isNull())
|
||||
{
|
||||
m_bError = true;
|
||||
cout << "naver";
|
||||
return true;
|
||||
}
|
||||
|
||||
QWebElement eleMain = Find(frame->documentElement(),"div","class","blog section _blogBase");
|
||||
QStringList urlList;
|
||||
|
||||
if (eleMain.isNull())
|
||||
{
|
||||
QTimer::singleShot(RETRY_INTERVAL, this, SLOT(reloadListPage()));
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 10 ; i++)
|
||||
{
|
||||
QString str = "sp_blog_";
|
||||
@@ -495,15 +555,21 @@ void SCrawler::saveFrameList(QWebFrame *frame)
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
QWebElement total = Find(eleMain,"span","class","title_num");
|
||||
if (total.toPlainText().isEmpty()) {m_bError = true; return;}
|
||||
if (total.toPlainText().isEmpty()) {m_bError = true; return true;}
|
||||
int nTotal = GetNumber(total.toPlainText().split("/").at(1));
|
||||
QStringList strList = m_strUrl.split("&");
|
||||
int nNow = GetNumber(strList.at(strList.size() - 1).split("=").at(1));
|
||||
bool ok = false;
|
||||
int nNow = GetNumber(strList.at(strList.size() - 1).split("=").at(1), ok);
|
||||
if (!ok)
|
||||
{
|
||||
m_bError = true;
|
||||
return true;
|
||||
}
|
||||
if ((nNow + 10) > nTotal || nNow >= 1000)
|
||||
m_bLast = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
enum E_DATA
|
||||
@@ -873,6 +939,15 @@ void SCrawler::saveFrameCafeList(QWebFrame *frame)
|
||||
m_bLast = true;
|
||||
return;
|
||||
}
|
||||
|
||||
QWebElement naverBlock = Find(frame->documentElement(), "p", "class", "info01");
|
||||
if (!naverBlock.isNull())
|
||||
{
|
||||
m_bError = true;
|
||||
cout << "naver";
|
||||
return;
|
||||
}
|
||||
|
||||
QStringList urlList;
|
||||
|
||||
QWebElement eleMain = Find(frame->documentElement(),"div","class","cafe_article section _cafeBase");
|
||||
@@ -1687,6 +1762,7 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
qDebug() << frame->baseUrl().toString();
|
||||
qDebug() << "executed";
|
||||
|
||||
|
||||
if(frame->baseUrl().toString().contains("entertain") && !frame->baseUrl().toString().contains("comment"))
|
||||
{
|
||||
m_page->mainFrame()->load(QUrl(frame->baseUrl().toString().replace("read", "comment/list")));
|
||||
@@ -2030,12 +2106,74 @@ bool SCrawler::getProxyList(QString &_str)
|
||||
str += "\n";
|
||||
_str += str;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SCrawler::setProxy()
|
||||
bool SCrawler::setProxyFromFile()
|
||||
{
|
||||
QFile file("proxy.txt");
|
||||
QRegExp rx("^\\s*([\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3})[^\\d]*([\\d]*)");
|
||||
|
||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
||||
{
|
||||
QVector <QStringList> vecProxy;
|
||||
while (!file.atEnd())
|
||||
{
|
||||
QString str = QString(file.readLine());
|
||||
if (str.isEmpty()) continue;
|
||||
int pos = 0;
|
||||
QStringList strList;
|
||||
while ((pos = rx.indexIn(str, pos)) != -1)
|
||||
{
|
||||
if (!rx.cap(1).isEmpty())
|
||||
strList.append(rx.cap(1));
|
||||
if (!rx.cap(2).isEmpty())
|
||||
strList.append(rx.cap(2));
|
||||
pos += rx.matchedLength();
|
||||
}
|
||||
if (!strList.isEmpty())
|
||||
vecProxy.push_back(strList);
|
||||
}
|
||||
if (vecProxy.size() > 0)
|
||||
{
|
||||
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
||||
//QNetworkAccessManager *manager = new QNetworkAccessManager;
|
||||
|
||||
switch(strList.size())
|
||||
{
|
||||
case 1:
|
||||
cout << "p : " << strList.at(0).toStdString() << " from File" << endl;
|
||||
|
||||
//manager->setProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
||||
//m_page->setNetworkAccessManager(manager);
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
||||
break;
|
||||
case 2:
|
||||
cout << "p : " << strList.at(0).toStdString() << ":" << strList.at(1).toStdString() << " from File" << endl;
|
||||
|
||||
//manager->setProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||
//m_page->setNetworkAccessManager(manager);
|
||||
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
file.close();
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SCrawler::setProxyFromDb()
|
||||
{
|
||||
QString proxyList;
|
||||
|
||||
if (getProxyList(proxyList))
|
||||
{
|
||||
QVector <QStringList> vecProxy;
|
||||
@@ -2052,79 +2190,43 @@ void SCrawler::setProxy()
|
||||
switch(strList.size())
|
||||
{
|
||||
case 1:
|
||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||
cout << "p : " << strList.at(0).toStdString() << " from DB" << endl;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
||||
break;
|
||||
case 2:
|
||||
cout << "p : " << strList.at(0).toStdString() << ":" << strList.at(1).toStdString() << endl;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||
cout << "p : " << strList.at(0).toStdString() << ":" << strList.at(1).toStdString() << " from DB" << endl;
|
||||
m_strProxyIP = strList.at(0);
|
||||
m_nProxyPort = strList.at(1).toInt();
|
||||
//QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,"196.201.216.172",8088)));
|
||||
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||
/*
|
||||
QString strProxyHost = "61.103.7.74";
|
||||
int nPort = 2074;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strProxyHost,nPort)));
|
||||
*/
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
QFile file("proxy.txt");
|
||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
||||
{
|
||||
QVector <QStringList> vecProxy;
|
||||
while (!file.atEnd())
|
||||
{
|
||||
QString str = QString(file.readLine());
|
||||
if (str.isEmpty()) continue;
|
||||
vecProxy.push_back(str.split(","));
|
||||
}
|
||||
if (vecProxy.size() > 0)
|
||||
{
|
||||
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
||||
switch(strList.size())
|
||||
{
|
||||
case 1:
|
||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
||||
break;
|
||||
case 2:
|
||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
QFile file("proxy.txt");
|
||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
||||
{
|
||||
QVector <QStringList> vecProxy;
|
||||
while (!file.atEnd())
|
||||
{
|
||||
QString str = QString(file.readLine());
|
||||
if (str.isEmpty()) continue;
|
||||
vecProxy.push_back(str.split(","));
|
||||
}
|
||||
if (vecProxy.size() > 0)
|
||||
{
|
||||
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
||||
switch(strList.size())
|
||||
{
|
||||
case 1:
|
||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
||||
break;
|
||||
case 2:
|
||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SCrawler::setProxy()
|
||||
{
|
||||
bool ok = setProxyFromFile() || setProxyFromDb();
|
||||
if (!ok)
|
||||
cout << "No Proxy" << endl;
|
||||
}
|
||||
|
||||
void SCrawler::deleteProxy()
|
||||
{
|
||||
if (m_strProxyIP.isEmpty()) return;
|
||||
|
||||
@@ -34,6 +34,7 @@ signals:
|
||||
private slots:
|
||||
void saveResult(bool ok);
|
||||
void reloadPage();
|
||||
void reloadListPage();
|
||||
private:
|
||||
int m_nSelect;
|
||||
QString m_strReper;
|
||||
@@ -50,12 +51,14 @@ private:
|
||||
bool m_bLast;
|
||||
bool m_bError;
|
||||
bool m_bNothing;
|
||||
bool m_bProcessed;
|
||||
int m_nRetryCount;
|
||||
QString m_strProxyIP;
|
||||
int m_nProxyPort;
|
||||
int m_nRetryCount;
|
||||
|
||||
QString SqlString(QString _str);
|
||||
QString GetSafeUtf(QString _strData);
|
||||
void saveFrameList(QWebFrame *frame);
|
||||
bool saveFrameList(QWebFrame *frame);
|
||||
void saveFrameCafeList(QWebFrame *frame);
|
||||
bool saveFrameUrl(QWebFrame *frame);
|
||||
void saveFrameComment(QWebFrame *frame);
|
||||
@@ -70,7 +73,11 @@ private:
|
||||
bool saveFrameNewsComment(QWebFrame *frame);
|
||||
|
||||
int GetNumber(QString _str);
|
||||
int GetNumber(QString _str, bool &ok);
|
||||
|
||||
bool getProxyList(QString &_str);
|
||||
bool setProxyFromFile();
|
||||
bool setProxyFromDb();
|
||||
void setProxy();
|
||||
void deleteProxy();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user