Files
clients/AjaxCrawlerProcess/sreplygetmanage.cpp
admin 93a9e55cf4 Naver News 어드레스 변경
git-svn-id: svn://192.168.0.12/source@169 8346c931-da38-4b9b-9d4c-e48b93cbd075
2015-08-06 03:39:11 +00:00

248 lines
9.8 KiB
C++

#include <QJsonDocument>
#include <QJsonObject>
#include <QJsonValue>
#include <QJsonArray>
#include <QHostInfo>
#include <iostream>
using namespace std;
#include "sreplygetmanage.h"
#include "srunnable.h"
#include "data.h"
extern void Debug(QString _strFilename,QString _strData);
SReplyGetManage::SReplyGetManage() : m_nTotal(-1)
{
m_pool = new QThreadPool;
}
SReplyGetManage::~SReplyGetManage()
{
}
void SReplyGetManage::SetUrl(QString _strUrl)
{
m_strUrl = _strUrl;
QStringList strList = _strUrl.split("&");
QString strOid,strAid;
{
foreach(QString str ,strList)
{
QStringList strListData = str.split("=");
if (strListData.size() == 2 )
{
if (strListData.at(0) == "oid")
strOid = strListData.at(1);
if (strListData.at(0) == "aid")
strAid = strListData.at(1);
}
}
}
m_strGno = "news" + strOid + "%2C" + strAid;
}
void SReplyGetManage::NaverNewsRun()
{
if (m_nTotal <= 0) return;
QHostInfo info = QHostInfo::fromName("comment.news.naver.com");
QTcpSocket socket;
socket.connectToHost(info.addresses().at(rand()%info.addresses().size()).toString(),80);
if(!socket.waitForConnected())
{
cout << "Error: " << socket.errorString().toStdString();
}
QString strTotal = QString::number(m_nTotal);
QString strParam = "pageSize="+strTotal+"&gno=" + m_strGno + "&serviceId=news&page=1";
socket.write(QString("POST /api/comment/list.json HTTP/1.1\r\n"
"Host: comment.news.naver.com\r\n"
"Connection: keep-alive\r\n"
"Content-Length: "+QString::number(strParam.size())+"\r\n"
"charset: utf-8\r\n"
"Origin: http://comment.news.naver.com\r\n"
"User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.124 Safari/537.36\r\n"
"Content-Type: application/x-www-form-urlencoded; charset=UTF-8\r\n"
"Accept: */*\r\n"
"Accept-Encoding: deflate\r\n"
"Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4,zh-CN;q=0.2,zh;q=0.2\r\n\r\n"+strParam).toUtf8());
QByteArray byArray;
while (socket.waitForReadyRead())
{
byArray += socket.readAll();
}
int index = byArray.indexOf("{");
byArray=byArray.mid(index-2);
bool bFlag = true;
QString strOut;
while(bFlag)
{
strOut += byArray.left(8188);
byArray=byArray.mid(8192);
if (byArray.size() <= 8192)
{
bFlag = false;
strOut += byArray;
}
}
strOut = strOut.replace("\r\n","").replace("\n","");
QJsonParseError error;
QJsonDocument d = QJsonDocument::fromJson(strOut.toUtf8(),&error);
if (error.error != 0)
{
cout << error.errorString().toStdString();
}
m_pool->setMaxThreadCount(4);
SReplyData *pReply = new SReplyData[m_nTotal];
int nCount = 0;
bFlag = false;
foreach(QJsonValue value ,d.object().value("message").toObject().value("result").toObject().value("commentReplies").toArray())
{
QJsonObject obj = value.toObject();
pReply[nCount].m_nReplyReplyCount = obj["replyCount"].toInt();
int i= E_REPLY_USER_ID;
while (i < E_REPLY_MAX)
{
if (i <= E_REPLY_CONTENT)
pReply[nCount].m_strReplyData[i] = obj[g_strJsonReplyHead[i]].toString();
else
{
pReply[nCount].m_strReplyData[i] = QString::number(obj[g_strJsonReplyHead[i]].toInt());
}
i++;
}
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += "\r\n";
for (int i = E_REPLY_COUNT_GOOD; i < E_REPLY_MAX ; i++)
{
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += "(";
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += g_strJsonReplyHead[i];
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += ",";
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += pReply[nCount].m_strReplyData[i];
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += ")\r\n";
}
bFlag = pReply[nCount].m_strReplyData[E_REPLY_USER_ID].isEmpty();
if (pReply[nCount].m_nReplyReplyCount > 0 )
{
SRunnable *pRun = new SRunnable(m_nSelect);
QString strParam = "commentNo=";
strParam += QString::number(obj["commentReplyNo"].toInt());
strParam += "&pageSize=100&gno=";
strParam += m_strGno;
strParam += "&serviceId=news";
pRun->SetParam(strParam,&pReply[nCount].m_strReplyReply);
pRun->m_strID = QString::number(obj["commentReplyNo"].toInt());
pRun->setAutoDelete(true);
m_pool->start(pRun);
}
nCount++;
}
m_pool->waitForDone();
for (int i = 0; i < m_nTotal ; i++)
{
m_pData->setData(m_pData->GetDate(pReply[i].m_strReplyData[E_REPLY_DATE]), SCrawlerData::ARTICLE_DATE);
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_CONTENT])), SCrawlerData::ARTICLE_DATA);
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_NICKNAME])), SCrawlerData::ARTICLE_NICKNAME);
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_ID])), SCrawlerData::ARTICLE_ID);
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyReply.join("\r\n"))), SCrawlerData::ETC);
m_pData->sendDB();
}
delete [] pReply;
}
void SReplyGetManage::DaumCafeRun()
{
for(int i = 0; i < E_DAUM_MAX; i++)
if(m_astrDaum[i].isEmpty()) return;
if (m_astrDaum[E_DAUM_DOWNSRC].indexOf("?") == -1) return;
QMap <QString,QString> mapData;
foreach(QString str,m_astrDaum[E_DAUM_DOWNSRC].split("?").at(1).split("&"))
{
QStringList list = str.split("=");
if (list.size() != 2) return;
mapData.insert(list.at(0),list.at(1));
}
int nTotal = m_astrDaum[E_DAUM_TOTAL].toInt() - 1;
if (nTotal < 0) return;
m_pool->setMaxThreadCount(8);
QStringList *pList = new QStringList[nTotal];
for (int i = 0; i < nTotal ; i++)
{
QString strParam = "callCount=1\n"
"page="+m_astrDaum[E_DAUM_DOWNSRC]+"&search=true\n"
"httpSessionId=\n"
"scriptSessionId=\n"
"c0-scriptName=ShortComment\n"
"c0-methodName=getList\n"
"c0-id=0\n"
"c0-param0=string:"+mapData["fldid"]+"\n"
"c0-param1=string:"+mapData["datanum"]+"\n"
"c0-param2=string:"+m_astrDaum[E_DAUM_TOTAL]+"\n"
"c0-param3=number:"+QString::number(i+1)+ "\n"
"c0-param4=string:"+m_astrDaum[E_DAUM_CDEPTH]+"\n"
"c0-param5=null:null\n"
"c0-param6=boolean:false\n"
"c0-param7=boolean:false\n"
"c0-param8=boolean:false\n"
"c0-param9=boolean:false\n"
"batchId=1\n";
QString strSend = QString("POST /_c21_/dwr/shortcomment/call/plaincall/ShortComment.getList.dwr HTTP/1.1\r\n"
"Host: cafe.daum.net\r\n"
"Connection: keep-alive\r\n"
"Content-Length: "+QString::number(strParam.size())+"\r\n"
"Origin: http://cafe.daum.net\r\n"
"User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36\r\n"
"Content-Type: text/plain\r\n"
"Accept: */*\r\n"
"Referer: http://cafe.daum.net/"+m_astrDaum[E_DAUM_DOWNSRC]+"&search=true\r\n"
"Accept-Encoding: gzip, deflate\r\n"
"Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4,zh-CN;q=0.2,zh;q=0.2\r\n\r\n") + strParam;
SRunnable *pRun = new SRunnable(m_nSelect);
pRun->SetParam(strSend,&pList[i]);
pRun->m_strID = QString::number(i);
pRun->setAutoDelete(true);
m_pool->start(pRun);
}
m_pool->waitForDone();
for (int i = 0; i < nTotal ; i++)
{
int j = 0;
while(j < pList[i].size())
{
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_DATE);
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_DATA);
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_NICKNAME);
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_PARENT);
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_ID);
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_ORDER);
m_pData->sendDB();
}
}
delete [] pList;
}
void SReplyGetManage::Start(SCrawlerData *_pData)
{
_pData->setData("reply", SCrawlerData::ARTICLE_FORM);
m_pData = _pData;
switch(m_nSelect)
{
case E_NAVER_NEWS:
NaverNewsRun();
break;
case E_DAUM_CAFE:
DaumCafeRun();
break;
}
}