뉴스 코를링 기능 추가

git-svn-id: svn://192.168.0.12/source@148 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2015-06-18 08:43:34 +00:00
parent b47031f0d3
commit f1629c85f3
5 changed files with 65 additions and 12 deletions

View File

@@ -15,8 +15,6 @@ int main(int argc, char *argv[])
a.setApplicationName(QString("Chrome"));
a.setApplicationVersion(QString("39.0.2171.95"));
QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL");
db.setHostName("bigbird.iptime.org");
db.setUserName("admin");

View File

@@ -29,6 +29,13 @@ void SCrawler::load(QStringList _strlistArgv)
if (_strlistArgv[0] == "naver")
{
if (_strlistArgv[1] == "news_list")
{
m_strUrl = _strlistArgv[2];
m_nSelect = E_NAVER_NEWS_LIST;
setProxy();
}
if (_strlistArgv[1] == "cafe_list")
{
m_strUrl = _strlistArgv[2];
@@ -66,7 +73,9 @@ void SCrawler::load(QStringList _strlistArgv)
m_strUrl = _strlistArgv[2];
m_nSelect = E_NAVER_BLOG_REPLY;
}
m_strTable = "data_" + _strlistArgv[3];
if (_strlistArgv.size() > 3)
m_strTable = "data_" + _strlistArgv[3];
}
@@ -162,6 +171,7 @@ void SCrawler::saveResult(bool ok)
switch(m_nSelect)
{
case E_NAVER_NEWS_LIST:saveFrameNewsList(m_page->mainFrame());break;
case E_NAVER_CAFE_LIST:saveFrameCafeList(m_page->mainFrame());break;
case E_NAVER_CAFE_DATA:
{
@@ -200,6 +210,7 @@ void SCrawler::saveResult(bool ok)
case E_NAVER_BLOG_LIST:
case E_DAUM_CAFE_LIST:
case E_DAUM_BLOG_LIST:
case E_NAVER_NEWS_LIST:
if (m_bError)
{
cout << "block";// block
@@ -294,7 +305,6 @@ void SCrawler::saveFrameList(QWebFrame *frame)
}
QWebElement eleMain = Find(frame->documentElement(),"div","class","blog section _blogBase");
QSqlQuery sql;
QStringList urlList;
for (int i = 0; i < 10 ; i++)
@@ -1133,12 +1143,11 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
{
if (m_bUse == true) return;
static int cz = 0;
///static int cz = 0;
// Debug(frame->frameName() + QString::number(cz++) + ".html",frame->toHtml());
//int nLast = 0;
QStringList urlList;
QWebElement eleMain = Find(frame->documentElement(),"div","class","type_fulltext wid_f");
foreach(QWebElement eleSub,eleMain.findAll("div"))
{
if (eleSub.attribute("class") == "wrap_cont")
@@ -1564,6 +1573,41 @@ void SCrawler::saveFrameDaumBlogUrl(QWebFrame *frame){}
void SCrawler::saveFrameDaumBlogComment(QWebFrame *frame){}
void SCrawler::saveFrameDaumBlogList(QWebFrame *frame){}
void SCrawler::saveFrameNewsList(QWebFrame *frame)
{
if (m_bUse == true) return;
QWebElement eleMain = Find(frame->documentElement(),"div","class","srch_result_area headline");
foreach(QWebElement eleSub,eleMain.findAll("div"))
{
if (eleSub.attribute("class") == QString("info"))
{
QString str = Find(eleSub,"a","class","go_naver").attribute("href");
if (str.trimmed().isEmpty()) continue;
if (str.contains("http://sports")) continue;
cout << "o " << str.toStdString() << endl;
}
}
QStringList strTotal = bodydata.GetNumber(Find(frame->documentElement(),"span","class","result_num").toPlainText().trimmed());
QVector <int> vecTotal;
foreach(QString str,strTotal)
{
if (str.trimmed().isEmpty() == false)
{
vecTotal.push_back(str.toInt());
}
}
if (vecTotal.size() == 3)
{
if (vecTotal[0] >= vecTotal[1])
m_bLast = true;
if (vecTotal[1] == vecTotal[2])
m_bLast = true;
}
else
m_bError = true;
m_bUse = true;
}
QWebElement SCrawler::Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind)
{
QWebElementCollection elements = _FindElement.findAll(_strElement);

View File

@@ -18,7 +18,8 @@ public:
E_DAUM_CAFE_DATA,
E_DAUM_BLOG_LIST,
E_DAUM_BLOG_BODY,
E_DAUM_BLOG_REPLY
E_DAUM_BLOG_REPLY,
E_NAVER_NEWS_LIST
};
public:
SCrawler();
@@ -61,6 +62,7 @@ private:
void saveFrameDaumBlogUrl(QWebFrame *frame);
void saveFrameDaumBlogComment(QWebFrame *frame);
void saveFrameDaumCafeUrl(QWebFrame *frame);
void saveFrameNewsList(QWebFrame *frame);
int GetNumber(QString _str);
bool getProxyList(QString &_str);
void setProxy();

View File

@@ -102,7 +102,6 @@ bool SCrawlerData::sendDB()
cout << "error : " << query.lastError().text().toStdString();
return false;
}
return true;
}
@@ -123,4 +122,15 @@ QString SCrawlerData::GetSafeUtf(QString _strData)
return str;
}
QStringList SCrawlerData::GetNumber(QString _str)
{
QString str;
QChar *pch = _str.data();
for (int i = 0; i < _str.length(); i++)
{
if (pch[i].isNumber() || pch[i].isSpace())
str += pch[i];
}
return str.trimmed().split(" ");
}

View File

@@ -40,11 +40,10 @@ private:
QString GetSafeUtf(QString _strData);
QString getTable();
public:
SCrawlerData();
~SCrawlerData();
QStringList GetNumber(QString _str);
QString getData(int _num);
void setData(QString _str, int _num);
void clear();