From d48ec1fbcb46825587136b783cc3d39234b74fd5 Mon Sep 17 00:00:00 2001 From: admin Date: Thu, 29 Oct 2015 02:07:35 +0000 Subject: [PATCH] =?UTF-8?q?navernews=20=EB=8C=93=EA=B8=80=20=EA=B0=80?= =?UTF-8?q?=EC=A0=B8=EC=98=A4=EA=B8=B0=20=EC=88=98=EC=A0=95=20json?= =?UTF-8?q?=ED=98=95=EC=8B=9D=20=EB=92=A4=EC=97=90=20=EC=9D=B4=EC=83=81?= =?UTF-8?q?=ED=95=9C=20=EB=AC=B8=EC=9E=90=EB=93=A4=EC=9D=B4=20=EB=B6=99?= =?UTF-8?q?=EC=96=B4=EC=9E=88=EC=96=B4=EC=84=9C=20=EC=98=A4=EB=A5=98?= =?UTF-8?q?=EB=A5=BC=20=EC=9D=BC=EC=9C=BC=ED=82=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://192.168.0.12/source@218 8346c931-da38-4b9b-9d4c-e48b93cbd075 --- AjaxCrawlerProcess/scrawler.cpp | 2 +- AjaxCrawlerProcess/sreplygetmanage.cpp | 29 ++++++++++++++++++++++++-- AjaxCrawlerProcess/srunnable.cpp | 10 ++++++++- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/AjaxCrawlerProcess/scrawler.cpp b/AjaxCrawlerProcess/scrawler.cpp index 6ab6c1f..27ad69d 100644 --- a/AjaxCrawlerProcess/scrawler.cpp +++ b/AjaxCrawlerProcess/scrawler.cpp @@ -176,7 +176,7 @@ void SCrawler::saveResultNaverNews() if (strlike.isEmpty() == false) strData += "\r\nlike(" + QString::number(strlike.toInt()) + ")"; else { - Debug("out.html",m_page->mainFrame()->toHtml()); + //Debug("out.html",m_page->mainFrame()->toHtml()); } element = Find(m_page->mainFrame()->documentElement(),"div","class","press_logo"); diff --git a/AjaxCrawlerProcess/sreplygetmanage.cpp b/AjaxCrawlerProcess/sreplygetmanage.cpp index cd3381c..7c61007 100644 --- a/AjaxCrawlerProcess/sreplygetmanage.cpp +++ b/AjaxCrawlerProcess/sreplygetmanage.cpp @@ -4,6 +4,9 @@ #include #include #include +#include +#include + using namespace std; @@ -73,11 +76,21 @@ void SReplyGetManage::NaverNewsRun() { byArray += socket.readAll(); } - +/* + { + QFile raw("c:\\data\\rawnews.json"); + raw.open(QFile::WriteOnly | QFile::Text); + QTextStream in(&raw); + in << QString(byArray); + raw.close(); + } +*/ int index = byArray.indexOf("{"); byArray=byArray.mid(index-2); bool bFlag = true; + QString strOut; + while(bFlag) { strOut += byArray.left(8188); @@ -88,7 +101,15 @@ void SReplyGetManage::NaverNewsRun() strOut += byArray; } } - strOut = strOut.replace("\r\n","").replace("\n",""); + + strOut = strOut.replace("\r\n","").replace("\n",""); + { + QRegExp re("\\}\\}[\\w\\s]*$"); + + strOut = strOut.replace(re,"}}"); + } + + QJsonParseError error; QJsonDocument d = QJsonDocument::fromJson(strOut.toUtf8(),&error); if (error.error != 0) @@ -103,6 +124,7 @@ void SReplyGetManage::NaverNewsRun() { QJsonObject obj = value.toObject(); pReply[nCount].m_nReplyReplyCount = obj["replyCount"].toInt(); + //qDebug() << "m_nReplyReply" << pReply[nCount].m_nReplyReplyCount; int i= E_REPLY_USER_ID; while (i < E_REPLY_MAX) { @@ -140,6 +162,8 @@ void SReplyGetManage::NaverNewsRun() nCount++; } m_pool->waitForDone(); +// qDebug() << "m_nTotal = " << m_nTotal; + for (int i = 0; i < m_nTotal ; i++) { m_pData->setData(m_pData->GetDate(pReply[i].m_strReplyData[E_REPLY_DATE]), SCrawlerData::ARTICLE_DATE); @@ -147,6 +171,7 @@ void SReplyGetManage::NaverNewsRun() m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_NICKNAME])), SCrawlerData::ARTICLE_NICKNAME); m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_ID])), SCrawlerData::ARTICLE_ID); m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyReply.join("\r\n"))), SCrawlerData::ETC); + m_pData->setData(QString::number(i), SCrawlerData::ARTICLE_ORDER); m_pData->sendDB(); } delete [] pReply; diff --git a/AjaxCrawlerProcess/srunnable.cpp b/AjaxCrawlerProcess/srunnable.cpp index 1e615d6..514d0e3 100644 --- a/AjaxCrawlerProcess/srunnable.cpp +++ b/AjaxCrawlerProcess/srunnable.cpp @@ -53,7 +53,6 @@ void SRunnable::RunNaverNews() { byArray += socket.readAll(); } - QJsonDocument d; { QJsonParseError error; @@ -71,13 +70,22 @@ void SRunnable::RunNaverNews() strOut += byArray; } } + strOut = strOut.replace("\r\n","").replace("\n",""); + + { + QRegExp re("\\}\\}[\\w\\s]*$"); + strOut = strOut.replace(re,"}}"); + } + + if (strOut.length() <= 0 ) return; d = QJsonDocument::fromJson(strOut.trimmed().toUtf8(),&error); if (error.error != 0) { qDebug() << error.errorString(); Debug("reply.json",strOut); + socket.close(); exit(0); } }