네이버블로그 변경

git-svn-id: svn://192.168.0.12/source@268 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2016-05-26 06:37:29 +00:00
parent 870c3dd4f0
commit e1c6d46828
3 changed files with 182 additions and 73 deletions

View File

@@ -13,8 +13,8 @@ int main(int argc, char *argv[])
srand(time(0));
QApplication a(argc, argv);
a.setApplicationName(QString("Chrome"));
a.setApplicationVersion(QString("39.0.2171.95"));
a.setApplicationVersion(QString("50.0.2661.102"));
//39.0.2171.95
QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL");
db.setHostName("bigbird.iptime.org");
db.setUserName("admin");

View File

@@ -6,6 +6,8 @@
#include <QDebug>
#include <QTimer>
#include <QThread>
#include <QRegExp>
#include <ctime>
using namespace std;
@@ -22,7 +24,9 @@ SCrawler::SCrawler():QObject()
{
m_page = new QWebPage;
m_nRetryCount = 0;
m_bProcessed = false;
connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool)));
srand(time(NULL));
}
SCrawler::~SCrawler()
@@ -141,7 +145,6 @@ void SCrawler::load(QStringList _strlistArgv)
}
m_strTable = "data_" + _strlistArgv[3];
}
cout << m_strUrl.toStdString() << endl;
QUrl url = QUrl(m_strUrl);
@@ -181,6 +184,8 @@ void SCrawler::UpdateError(QString _strError)
void SCrawler::saveResult(bool ok)
{
qDebug() << "saveResult";
if (!ok)
{
cout << "Failed loading";
@@ -188,7 +193,7 @@ void SCrawler::saveResult(bool ok)
emit finished();
return;
}
qDebug() << "load complete";
//qDebug() << "load complete";
switch(m_nSelect)
{
case E_NAVER_NEWS_LIST:saveFrameNewsList(m_page->mainFrame());break;
@@ -216,7 +221,13 @@ void SCrawler::saveResult(bool ok)
bodydata.sendDB();
break;
}
case E_NAVER_BLOG_LIST:saveFrameList(m_page->mainFrame());break;
case E_NAVER_BLOG_LIST:
{
if(saveFrameList(m_page->mainFrame()))
break;
else
return;
}
case E_NAVER_BLOG_BODY:
{
if(!saveFrameUrl(m_page->mainFrame()))
@@ -297,6 +308,7 @@ void SCrawler::saveResult(bool ok)
break;
}
emit finished();
}
int SCrawler::GetNumber(QString _str)
@@ -310,6 +322,19 @@ int SCrawler::GetNumber(QString _str)
return strNumber.toInt();
}
int SCrawler::GetNumber(QString _str, bool &ok)
{
QString strNumber;
for (int i = 0; i < _str.size();i++)
{
if (_str.at(i).isNumber())
strNumber += _str.at(i);
}
return strNumber.toInt(&ok);
}
void SCrawler::Debug(QString _strFilename,QString _strData)
{
QFile file(_strFilename);
@@ -345,20 +370,55 @@ QString SCrawler::GetSafeUtf(QString _strData)
return str;
}
void SCrawler::saveFrameList(QWebFrame *frame)
void SCrawler::reloadListPage()
{
if (m_bUse == true) return;
++m_nRetryCount;
if (m_nRetryCount >= RETRY_MAX)
{
cout << "block";
emit finished();
return;
}
m_bProcessed = false;
saveResult(true);
}
bool SCrawler::saveFrameList(QWebFrame *frame)
{
if (m_bProcessed == false)
m_bProcessed = true;
else
return false;
//qDebug() << frame->documentElement().toPlainText();
if (m_bUse == true) return true;
QWebElement notFound = Find(frame->documentElement(),"div","id","notfound");
if(notFound.isNull() == false)
{
m_bLast = true;
return;
return true;
}
QWebElement naverBlock = Find(frame->documentElement(), "p", "class", "info01");
if (!naverBlock.isNull())
{
m_bError = true;
cout << "naver";
return true;
}
QWebElement eleMain = Find(frame->documentElement(),"div","class","blog section _blogBase");
QStringList urlList;
if (eleMain.isNull())
{
QTimer::singleShot(RETRY_INTERVAL, this, SLOT(reloadListPage()));
return false;
}
for (int i = 0; i < 10 ; i++)
{
QString str = "sp_blog_";
@@ -495,15 +555,21 @@ void SCrawler::saveFrameList(QWebFrame *frame)
}
{
QWebElement total = Find(eleMain,"span","class","title_num");
if (total.toPlainText().isEmpty()) {m_bError = true; return;}
if (total.toPlainText().isEmpty()) {m_bError = true; return true;}
int nTotal = GetNumber(total.toPlainText().split("/").at(1));
QStringList strList = m_strUrl.split("&");
int nNow = GetNumber(strList.at(strList.size() - 1).split("=").at(1));
bool ok = false;
int nNow = GetNumber(strList.at(strList.size() - 1).split("=").at(1), ok);
if (!ok)
{
m_bError = true;
return true;
}
if ((nNow + 10) > nTotal || nNow >= 1000)
m_bLast = true;
}
return true;
}
enum E_DATA
@@ -873,6 +939,15 @@ void SCrawler::saveFrameCafeList(QWebFrame *frame)
m_bLast = true;
return;
}
QWebElement naverBlock = Find(frame->documentElement(), "p", "class", "info01");
if (!naverBlock.isNull())
{
m_bError = true;
cout << "naver";
return;
}
QStringList urlList;
QWebElement eleMain = Find(frame->documentElement(),"div","class","cafe_article section _cafeBase");
@@ -1687,6 +1762,7 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
qDebug() << frame->baseUrl().toString();
qDebug() << "executed";
if(frame->baseUrl().toString().contains("entertain") && !frame->baseUrl().toString().contains("comment"))
{
m_page->mainFrame()->load(QUrl(frame->baseUrl().toString().replace("read", "comment/list")));
@@ -2030,12 +2106,74 @@ bool SCrawler::getProxyList(QString &_str)
str += "\n";
_str += str;
}
return true;
}
void SCrawler::setProxy()
bool SCrawler::setProxyFromFile()
{
QFile file("proxy.txt");
QRegExp rx("^\\s*([\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3})[^\\d]*([\\d]*)");
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
{
QVector <QStringList> vecProxy;
while (!file.atEnd())
{
QString str = QString(file.readLine());
if (str.isEmpty()) continue;
int pos = 0;
QStringList strList;
while ((pos = rx.indexIn(str, pos)) != -1)
{
if (!rx.cap(1).isEmpty())
strList.append(rx.cap(1));
if (!rx.cap(2).isEmpty())
strList.append(rx.cap(2));
pos += rx.matchedLength();
}
if (!strList.isEmpty())
vecProxy.push_back(strList);
}
if (vecProxy.size() > 0)
{
QStringList strList = vecProxy.at(rand()%vecProxy.size());
//QNetworkAccessManager *manager = new QNetworkAccessManager;
switch(strList.size())
{
case 1:
cout << "p : " << strList.at(0).toStdString() << " from File" << endl;
//manager->setProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
//m_page->setNetworkAccessManager(manager);
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
break;
case 2:
cout << "p : " << strList.at(0).toStdString() << ":" << strList.at(1).toStdString() << " from File" << endl;
//manager->setProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
//m_page->setNetworkAccessManager(manager);
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
break;
}
}
else
{
return false;
}
file.close();
return true;
}
else
return false;
}
bool SCrawler::setProxyFromDb()
{
QString proxyList;
if (getProxyList(proxyList))
{
QVector <QStringList> vecProxy;
@@ -2052,77 +2190,41 @@ void SCrawler::setProxy()
switch(strList.size())
{
case 1:
cout << "p : " << strList.at(0).toStdString() << endl;
cout << "p : " << strList.at(0).toStdString() << " from DB" << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
break;
case 2:
cout << "p : " << strList.at(0).toStdString() << ":" << strList.at(1).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
cout << "p : " << strList.at(0).toStdString() << ":" << strList.at(1).toStdString() << " from DB" << endl;
m_strProxyIP = strList.at(0);
m_nProxyPort = strList.at(1).toInt();
//QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,"196.201.216.172",8088)));
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
/*
QString strProxyHost = "61.103.7.74";
int nPort = 2074;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strProxyHost,nPort)));
*/
break;
}
return true;
}
else
{
return false;
}
}
else
{
QFile file("proxy.txt");
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
return false;
}
}
void SCrawler::setProxy()
{
QVector <QStringList> vecProxy;
while (!file.atEnd())
{
QString str = QString(file.readLine());
if (str.isEmpty()) continue;
vecProxy.push_back(str.split(","));
}
if (vecProxy.size() > 0)
{
QStringList strList = vecProxy.at(rand()%vecProxy.size());
switch(strList.size())
{
case 1:
cout << "p : " << strList.at(0).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
break;
case 2:
cout << "p : " << strList.at(0).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
break;
}
}
}
}
}
else
{
QFile file("proxy.txt");
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
{
QVector <QStringList> vecProxy;
while (!file.atEnd())
{
QString str = QString(file.readLine());
if (str.isEmpty()) continue;
vecProxy.push_back(str.split(","));
}
if (vecProxy.size() > 0)
{
QStringList strList = vecProxy.at(rand()%vecProxy.size());
switch(strList.size())
{
case 1:
cout << "p : " << strList.at(0).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
break;
case 2:
cout << "p : " << strList.at(0).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
break;
}
}
}
}
bool ok = setProxyFromFile() || setProxyFromDb();
if (!ok)
cout << "No Proxy" << endl;
}
void SCrawler::deleteProxy()

View File

@@ -34,6 +34,7 @@ signals:
private slots:
void saveResult(bool ok);
void reloadPage();
void reloadListPage();
private:
int m_nSelect;
QString m_strReper;
@@ -50,12 +51,14 @@ private:
bool m_bLast;
bool m_bError;
bool m_bNothing;
bool m_bProcessed;
int m_nRetryCount;
QString m_strProxyIP;
int m_nProxyPort;
int m_nRetryCount;
QString SqlString(QString _str);
QString GetSafeUtf(QString _strData);
void saveFrameList(QWebFrame *frame);
bool saveFrameList(QWebFrame *frame);
void saveFrameCafeList(QWebFrame *frame);
bool saveFrameUrl(QWebFrame *frame);
void saveFrameComment(QWebFrame *frame);
@@ -70,7 +73,11 @@ private:
bool saveFrameNewsComment(QWebFrame *frame);
int GetNumber(QString _str);
int GetNumber(QString _str, bool &ok);
bool getProxyList(QString &_str);
bool setProxyFromFile();
bool setProxyFromDb();
void setProxy();
void deleteProxy();