뉴스 코를링 기능 추가

git-svn-id: svn://192.168.0.12/source@148 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2015-06-18 08:43:34 +00:00
parent b47031f0d3
commit f1629c85f3
5 changed files with 65 additions and 12 deletions

View File

@@ -15,8 +15,6 @@ int main(int argc, char *argv[])
a.setApplicationName(QString("Chrome")); a.setApplicationName(QString("Chrome"));
a.setApplicationVersion(QString("39.0.2171.95")); a.setApplicationVersion(QString("39.0.2171.95"));
QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL"); QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL");
db.setHostName("bigbird.iptime.org"); db.setHostName("bigbird.iptime.org");
db.setUserName("admin"); db.setUserName("admin");

View File

@@ -16,7 +16,7 @@ struct SProxyList
SCrawler::SCrawler():QObject() SCrawler::SCrawler():QObject()
{ {
m_page = new QWebPage; m_page = new QWebPage;
connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool))); connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool)));
} }
SCrawler::~SCrawler() SCrawler::~SCrawler()
@@ -29,6 +29,13 @@ void SCrawler::load(QStringList _strlistArgv)
if (_strlistArgv[0] == "naver") if (_strlistArgv[0] == "naver")
{ {
if (_strlistArgv[1] == "news_list")
{
m_strUrl = _strlistArgv[2];
m_nSelect = E_NAVER_NEWS_LIST;
setProxy();
}
if (_strlistArgv[1] == "cafe_list") if (_strlistArgv[1] == "cafe_list")
{ {
m_strUrl = _strlistArgv[2]; m_strUrl = _strlistArgv[2];
@@ -66,7 +73,9 @@ void SCrawler::load(QStringList _strlistArgv)
m_strUrl = _strlistArgv[2]; m_strUrl = _strlistArgv[2];
m_nSelect = E_NAVER_BLOG_REPLY; m_nSelect = E_NAVER_BLOG_REPLY;
} }
m_strTable = "data_" + _strlistArgv[3];
if (_strlistArgv.size() > 3)
m_strTable = "data_" + _strlistArgv[3];
} }
@@ -162,6 +171,7 @@ void SCrawler::saveResult(bool ok)
switch(m_nSelect) switch(m_nSelect)
{ {
case E_NAVER_NEWS_LIST:saveFrameNewsList(m_page->mainFrame());break;
case E_NAVER_CAFE_LIST:saveFrameCafeList(m_page->mainFrame());break; case E_NAVER_CAFE_LIST:saveFrameCafeList(m_page->mainFrame());break;
case E_NAVER_CAFE_DATA: case E_NAVER_CAFE_DATA:
{ {
@@ -200,6 +210,7 @@ void SCrawler::saveResult(bool ok)
case E_NAVER_BLOG_LIST: case E_NAVER_BLOG_LIST:
case E_DAUM_CAFE_LIST: case E_DAUM_CAFE_LIST:
case E_DAUM_BLOG_LIST: case E_DAUM_BLOG_LIST:
case E_NAVER_NEWS_LIST:
if (m_bError) if (m_bError)
{ {
cout << "block";// block cout << "block";// block
@@ -294,7 +305,6 @@ void SCrawler::saveFrameList(QWebFrame *frame)
} }
QWebElement eleMain = Find(frame->documentElement(),"div","class","blog section _blogBase"); QWebElement eleMain = Find(frame->documentElement(),"div","class","blog section _blogBase");
QSqlQuery sql;
QStringList urlList; QStringList urlList;
for (int i = 0; i < 10 ; i++) for (int i = 0; i < 10 ; i++)
@@ -1133,12 +1143,11 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
{ {
if (m_bUse == true) return; if (m_bUse == true) return;
static int cz = 0; ///static int cz = 0;
// Debug(frame->frameName() + QString::number(cz++) + ".html",frame->toHtml()); // Debug(frame->frameName() + QString::number(cz++) + ".html",frame->toHtml());
//int nLast = 0; //int nLast = 0;
QStringList urlList; QStringList urlList;
QWebElement eleMain = Find(frame->documentElement(),"div","class","type_fulltext wid_f"); QWebElement eleMain = Find(frame->documentElement(),"div","class","type_fulltext wid_f");
foreach(QWebElement eleSub,eleMain.findAll("div")) foreach(QWebElement eleSub,eleMain.findAll("div"))
{ {
if (eleSub.attribute("class") == "wrap_cont") if (eleSub.attribute("class") == "wrap_cont")
@@ -1564,6 +1573,41 @@ void SCrawler::saveFrameDaumBlogUrl(QWebFrame *frame){}
void SCrawler::saveFrameDaumBlogComment(QWebFrame *frame){} void SCrawler::saveFrameDaumBlogComment(QWebFrame *frame){}
void SCrawler::saveFrameDaumBlogList(QWebFrame *frame){} void SCrawler::saveFrameDaumBlogList(QWebFrame *frame){}
void SCrawler::saveFrameNewsList(QWebFrame *frame)
{
if (m_bUse == true) return;
QWebElement eleMain = Find(frame->documentElement(),"div","class","srch_result_area headline");
foreach(QWebElement eleSub,eleMain.findAll("div"))
{
if (eleSub.attribute("class") == QString("info"))
{
QString str = Find(eleSub,"a","class","go_naver").attribute("href");
if (str.trimmed().isEmpty()) continue;
if (str.contains("http://sports")) continue;
cout << "o " << str.toStdString() << endl;
}
}
QStringList strTotal = bodydata.GetNumber(Find(frame->documentElement(),"span","class","result_num").toPlainText().trimmed());
QVector <int> vecTotal;
foreach(QString str,strTotal)
{
if (str.trimmed().isEmpty() == false)
{
vecTotal.push_back(str.toInt());
}
}
if (vecTotal.size() == 3)
{
if (vecTotal[0] >= vecTotal[1])
m_bLast = true;
if (vecTotal[1] == vecTotal[2])
m_bLast = true;
}
else
m_bError = true;
m_bUse = true;
}
QWebElement SCrawler::Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind) QWebElement SCrawler::Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind)
{ {
QWebElementCollection elements = _FindElement.findAll(_strElement); QWebElementCollection elements = _FindElement.findAll(_strElement);

View File

@@ -18,7 +18,8 @@ public:
E_DAUM_CAFE_DATA, E_DAUM_CAFE_DATA,
E_DAUM_BLOG_LIST, E_DAUM_BLOG_LIST,
E_DAUM_BLOG_BODY, E_DAUM_BLOG_BODY,
E_DAUM_BLOG_REPLY E_DAUM_BLOG_REPLY,
E_NAVER_NEWS_LIST
}; };
public: public:
SCrawler(); SCrawler();
@@ -61,6 +62,7 @@ private:
void saveFrameDaumBlogUrl(QWebFrame *frame); void saveFrameDaumBlogUrl(QWebFrame *frame);
void saveFrameDaumBlogComment(QWebFrame *frame); void saveFrameDaumBlogComment(QWebFrame *frame);
void saveFrameDaumCafeUrl(QWebFrame *frame); void saveFrameDaumCafeUrl(QWebFrame *frame);
void saveFrameNewsList(QWebFrame *frame);
int GetNumber(QString _str); int GetNumber(QString _str);
bool getProxyList(QString &_str); bool getProxyList(QString &_str);
void setProxy(); void setProxy();

View File

@@ -102,7 +102,6 @@ bool SCrawlerData::sendDB()
cout << "error : " << query.lastError().text().toStdString(); cout << "error : " << query.lastError().text().toStdString();
return false; return false;
} }
return true; return true;
} }
@@ -123,4 +122,15 @@ QString SCrawlerData::GetSafeUtf(QString _strData)
return str; return str;
} }
QStringList SCrawlerData::GetNumber(QString _str)
{
QString str;
QChar *pch = _str.data();
for (int i = 0; i < _str.length(); i++)
{
if (pch[i].isNumber() || pch[i].isSpace())
str += pch[i];
}
return str.trimmed().split(" ");
}

View File

@@ -38,13 +38,12 @@ private:
private: private:
QString GetSafeUtf(QString _strData); QString GetSafeUtf(QString _strData);
QString getTable(); QString getTable();
public: public:
SCrawlerData(); SCrawlerData();
~SCrawlerData(); ~SCrawlerData();
QStringList GetNumber(QString _str);
QString getData(int _num); QString getData(int _num);
void setData(QString _str, int _num); void setData(QString _str, int _num);
void clear(); void clear();