Files
clients/AjaxCrawlerProcess/sreplygetmanage.cpp
admin 0a6c66099d 뉴스 크롤러 기능 추가
git-svn-id: svn://192.168.0.12/source@151 8346c931-da38-4b9b-9d4c-e48b93cbd075
2015-06-18 08:49:08 +00:00

150 lines
5.6 KiB
C++

#include <QJsonDocument>
#include <QJsonObject>
#include <QJsonValue>
#include <QJsonArray>
#include "sreplygetmanage.h"
#include "srunnable.h"
#include "data.h"
extern void Debug(QString _strFilename,QString _strData);
SReplyGetManage::SReplyGetManage() : m_nTotal(-1)
{
m_pool = new QThreadPool;
}
SReplyGetManage::~SReplyGetManage()
{
}
void SReplyGetManage::SetUrl(QString _strUrl)
{
QStringList strList = _strUrl.split("&");
QString strOid,strAid;
{
foreach(QString str ,strList)
{
QStringList strListData = str.split("=");
if (strListData.size() == 2 )
{
if (strListData.at(0) == "oid")
strOid = strListData.at(1);
if (strListData.at(0) == "aid")
strAid = strListData.at(1);
}
}
}
m_strGno = "news" + strOid + "%2C" + strAid;
}
void SReplyGetManage::Start(SCrawlerData *_pData)
{
_pData->setData("reply", SCrawlerData::ARTICLE_FORM);
if (m_nTotal <= 0) return;
QTcpSocket socket;
socket.connectToHost("125.209.226.173",80);
if(!socket.waitForConnected())
{
qDebug() << "Error: " << socket.errorString();
}
//m_nTotal = 1;
QString strTotal = QString::number(m_nTotal);
QString strParam = "pageSize="+strTotal+"&gno=" + m_strGno + "&serviceId=news&page=1";
socket.write(QString("POST /api/comment/list.json HTTP/1.1\r\n"
"Host: comment.news.naver.com\r\n"
"Connection: keep-alive\r\n"
"Content-Length: "+QString::number(strParam.size())+"\r\n"
"charset: utf-8\r\n"
"Origin: http://comment.news.naver.com\r\n"
"User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.124 Safari/537.36\r\n"
"Content-Type: application/x-www-form-urlencoded; charset=UTF-8\r\n"
"Accept: */*\r\n"
"Accept-Encoding: deflate\r\n"
"Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4,zh-CN;q=0.2,zh;q=0.2\r\n\r\n"+strParam).toUtf8());
QByteArray byArray;
while (socket.waitForReadyRead())
{
byArray += socket.readAll();
}
int index = byArray.indexOf("{");
byArray=byArray.mid(index-2);
bool bFlag = true;
QString strOut;
while(bFlag)
{
strOut += byArray.left(8188);
byArray=byArray.mid(8192);
if (byArray.size() <= 8192)
{
bFlag = false;
strOut += byArray;
}
}
strOut = strOut.replace("\r\n","").replace("\n","");
QJsonParseError error;
QJsonDocument d = QJsonDocument::fromJson(strOut.toUtf8(),&error);
if (error.error != 0)
{
qDebug() << error.errorString();
}
m_pool->setMaxThreadCount(4);
SReplyData *pReply = new SReplyData[m_nTotal];
int nCount = 0;
foreach(QJsonValue value ,d.object().value("message").toObject().value("result").toObject().value("commentReplies").toArray())
{
QJsonObject obj = value.toObject();
pReply[nCount].m_nReplyReplyCount = obj["replyCount"].toInt();
int i= E_REPLY_USER_ID;
while (i < E_REPLY_MAX)
{
if (i <= E_REPLY_CONTENT)
pReply[nCount].m_strReplyData[i] = obj[g_strJsonReplyHead[i]].toString();
else
{
pReply[nCount].m_strReplyData[i] = QString::number(obj[g_strJsonReplyHead[i]].toInt());
}
i++;
}
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += "\r\n";
for (int i = E_REPLY_COUNT_GOOD; i < E_REPLY_MAX ; i++)
{
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += "(";
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += g_strJsonReplyHead[i];
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += ",";
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += pReply[nCount].m_strReplyData[i];
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += ")\r\n";
}
if (pReply[nCount].m_nReplyReplyCount > 0 )
{
SRunnable *pRun = new SRunnable();
QString strParam = "commentNo=";
strParam += QString::number(obj["commentReplyNo"].toInt());
strParam += "&pageSize=100&gno=";
strParam += m_strGno;
strParam += "&serviceId=news";
pRun->SetParam(strParam,&pReply[nCount].m_strReplyReply);
pRun->m_strID = QString::number(obj["commentReplyNo"].toInt());
pRun->setAutoDelete(true);
m_pool->start(pRun);
}
nCount++;
}
m_pool->waitForDone();
for (int i = 0; i < m_nTotal ; i++)
{
_pData->setData(_pData->GetDate(pReply[i].m_strReplyData[E_REPLY_DATE]), SCrawlerData::ARTICLE_DATE);
_pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_CONTENT])), SCrawlerData::ARTICLE_DATA);
_pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_NICKNAME])), SCrawlerData::ARTICLE_NICKNAME);
_pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_ID])), SCrawlerData::ARTICLE_ID);
_pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyReply.join("\r\n"))), SCrawlerData::ETC);
_pData->sendDB();
}
delete [] pReply;
}