247 lines
9.7 KiB
C++
247 lines
9.7 KiB
C++
#include <QJsonDocument>
|
|
#include <QJsonObject>
|
|
#include <QJsonValue>
|
|
#include <QJsonArray>
|
|
#include <QHostInfo>
|
|
#include <iostream>
|
|
|
|
using namespace std;
|
|
|
|
#include "sreplygetmanage.h"
|
|
#include "srunnable.h"
|
|
#include "data.h"
|
|
|
|
extern void Debug(QString _strFilename,QString _strData);
|
|
|
|
SReplyGetManage::SReplyGetManage() : m_nTotal(-1)
|
|
{
|
|
m_pool = new QThreadPool;
|
|
}
|
|
|
|
SReplyGetManage::~SReplyGetManage()
|
|
{
|
|
|
|
}
|
|
|
|
void SReplyGetManage::SetUrl(QString _strUrl)
|
|
{
|
|
m_strUrl = _strUrl;
|
|
QStringList strList = _strUrl.split("&");
|
|
QString strOid,strAid;
|
|
{
|
|
foreach(QString str ,strList)
|
|
{
|
|
QStringList strListData = str.split("=");
|
|
if (strListData.size() == 2 )
|
|
{
|
|
if (strListData.at(0) == "oid")
|
|
strOid = strListData.at(1);
|
|
if (strListData.at(0) == "aid")
|
|
strAid = strListData.at(1);
|
|
}
|
|
}
|
|
}
|
|
m_strGno = "news" + strOid + "%2C" + strAid;
|
|
}
|
|
|
|
void SReplyGetManage::NaverNewsRun()
|
|
{
|
|
if (m_nTotal <= 0) return;
|
|
QTcpSocket socket;
|
|
socket.connectToHost("125.209.226.173",80);
|
|
if(!socket.waitForConnected())
|
|
{
|
|
cout << "Error: " << socket.errorString().toStdString();
|
|
}
|
|
|
|
QString strTotal = QString::number(m_nTotal);
|
|
QString strParam = "pageSize="+strTotal+"&gno=" + m_strGno + "&serviceId=news&page=1";
|
|
socket.write(QString("POST /api/comment/list.json HTTP/1.1\r\n"
|
|
"Host: comment.news.naver.com\r\n"
|
|
"Connection: keep-alive\r\n"
|
|
"Content-Length: "+QString::number(strParam.size())+"\r\n"
|
|
"charset: utf-8\r\n"
|
|
"Origin: http://comment.news.naver.com\r\n"
|
|
"User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.124 Safari/537.36\r\n"
|
|
"Content-Type: application/x-www-form-urlencoded; charset=UTF-8\r\n"
|
|
"Accept: */*\r\n"
|
|
"Accept-Encoding: deflate\r\n"
|
|
"Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4,zh-CN;q=0.2,zh;q=0.2\r\n\r\n"+strParam).toUtf8());
|
|
QByteArray byArray;
|
|
while (socket.waitForReadyRead())
|
|
{
|
|
byArray += socket.readAll();
|
|
}
|
|
|
|
int index = byArray.indexOf("{");
|
|
byArray=byArray.mid(index-2);
|
|
bool bFlag = true;
|
|
QString strOut;
|
|
while(bFlag)
|
|
{
|
|
strOut += byArray.left(8188);
|
|
byArray=byArray.mid(8192);
|
|
if (byArray.size() <= 8192)
|
|
{
|
|
bFlag = false;
|
|
strOut += byArray;
|
|
}
|
|
}
|
|
strOut = strOut.replace("\r\n","").replace("\n","");
|
|
QJsonParseError error;
|
|
QJsonDocument d = QJsonDocument::fromJson(strOut.toUtf8(),&error);
|
|
if (error.error != 0)
|
|
{
|
|
cout << error.errorString().toStdString();
|
|
}
|
|
m_pool->setMaxThreadCount(4);
|
|
SReplyData *pReply = new SReplyData[m_nTotal];
|
|
int nCount = 0;
|
|
bFlag = false;
|
|
foreach(QJsonValue value ,d.object().value("message").toObject().value("result").toObject().value("commentReplies").toArray())
|
|
{
|
|
QJsonObject obj = value.toObject();
|
|
pReply[nCount].m_nReplyReplyCount = obj["replyCount"].toInt();
|
|
int i= E_REPLY_USER_ID;
|
|
while (i < E_REPLY_MAX)
|
|
{
|
|
if (i <= E_REPLY_CONTENT)
|
|
pReply[nCount].m_strReplyData[i] = obj[g_strJsonReplyHead[i]].toString();
|
|
else
|
|
{
|
|
pReply[nCount].m_strReplyData[i] = QString::number(obj[g_strJsonReplyHead[i]].toInt());
|
|
}
|
|
i++;
|
|
}
|
|
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += "\r\n";
|
|
for (int i = E_REPLY_COUNT_GOOD; i < E_REPLY_MAX ; i++)
|
|
{
|
|
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += "(";
|
|
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += g_strJsonReplyHead[i];
|
|
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += ",";
|
|
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += pReply[nCount].m_strReplyData[i];
|
|
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += ")\r\n";
|
|
}
|
|
bFlag = pReply[nCount].m_strReplyData[E_REPLY_USER_ID].isEmpty();
|
|
if (pReply[nCount].m_nReplyReplyCount > 0 )
|
|
{
|
|
SRunnable *pRun = new SRunnable(m_nSelect);
|
|
QString strParam = "commentNo=";
|
|
strParam += QString::number(obj["commentReplyNo"].toInt());
|
|
strParam += "&pageSize=100&gno=";
|
|
strParam += m_strGno;
|
|
strParam += "&serviceId=news";
|
|
pRun->SetParam(strParam,&pReply[nCount].m_strReplyReply);
|
|
pRun->m_strID = QString::number(obj["commentReplyNo"].toInt());
|
|
pRun->setAutoDelete(true);
|
|
m_pool->start(pRun);
|
|
}
|
|
nCount++;
|
|
}
|
|
m_pool->waitForDone();
|
|
for (int i = 0; i < m_nTotal ; i++)
|
|
{
|
|
m_pData->setData(m_pData->GetDate(pReply[i].m_strReplyData[E_REPLY_DATE]), SCrawlerData::ARTICLE_DATE);
|
|
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_CONTENT])), SCrawlerData::ARTICLE_DATA);
|
|
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_NICKNAME])), SCrawlerData::ARTICLE_NICKNAME);
|
|
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_ID])), SCrawlerData::ARTICLE_ID);
|
|
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pReply[i].m_strReplyReply.join("\r\n"))), SCrawlerData::ETC);
|
|
m_pData->sendDB();
|
|
}
|
|
delete [] pReply;
|
|
}
|
|
|
|
void SReplyGetManage::DaumCafeRun()
|
|
{
|
|
for(int i = 0; i < E_DAUM_MAX; i++)
|
|
if(m_astrDaum[i].isEmpty()) return;
|
|
|
|
if (m_astrDaum[E_DAUM_DOWNSRC].indexOf("?") == -1) return;
|
|
|
|
QMap <QString,QString> mapData;
|
|
|
|
foreach(QString str,m_astrDaum[E_DAUM_DOWNSRC].split("?").at(1).split("&"))
|
|
{
|
|
QStringList list = str.split("=");
|
|
if (list.size() != 2) return;
|
|
mapData.insert(list.at(0),list.at(1));
|
|
}
|
|
|
|
int nTotal = m_astrDaum[E_DAUM_TOTAL].toInt() - 1;
|
|
if (nTotal < 0) return;
|
|
|
|
m_pool->setMaxThreadCount(8);
|
|
QStringList *pList = new QStringList[nTotal];
|
|
|
|
for (int i = 0; i < nTotal ; i++)
|
|
{
|
|
QString strParam = "callCount=1\n"
|
|
"page="+m_astrDaum[E_DAUM_DOWNSRC]+"&search=true\n"
|
|
"httpSessionId=\n"
|
|
"scriptSessionId=\n"
|
|
"c0-scriptName=ShortComment\n"
|
|
"c0-methodName=getList\n"
|
|
"c0-id=0\n"
|
|
"c0-param0=string:"+mapData["fldid"]+"\n"
|
|
"c0-param1=string:"+mapData["datanum"]+"\n"
|
|
"c0-param2=string:"+m_astrDaum[E_DAUM_TOTAL]+"\n"
|
|
"c0-param3=number:"+QString::number(i+1)+ "\n"
|
|
"c0-param4=string:"+m_astrDaum[E_DAUM_CDEPTH]+"\n"
|
|
"c0-param5=null:null\n"
|
|
"c0-param6=boolean:false\n"
|
|
"c0-param7=boolean:false\n"
|
|
"c0-param8=boolean:false\n"
|
|
"c0-param9=boolean:false\n"
|
|
"batchId=1\n";
|
|
QString strSend = QString("POST /_c21_/dwr/shortcomment/call/plaincall/ShortComment.getList.dwr HTTP/1.1\r\n"
|
|
"Host: cafe.daum.net\r\n"
|
|
"Connection: keep-alive\r\n"
|
|
"Content-Length: "+QString::number(strParam.size())+"\r\n"
|
|
"Origin: http://cafe.daum.net\r\n"
|
|
"User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36\r\n"
|
|
"Content-Type: text/plain\r\n"
|
|
"Accept: */*\r\n"
|
|
"Referer: http://cafe.daum.net/"+m_astrDaum[E_DAUM_DOWNSRC]+"&search=true\r\n"
|
|
"Accept-Encoding: gzip, deflate\r\n"
|
|
"Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4,zh-CN;q=0.2,zh;q=0.2\r\n\r\n") + strParam;
|
|
SRunnable *pRun = new SRunnable(m_nSelect);
|
|
pRun->SetParam(strSend,&pList[i]);
|
|
pRun->m_strID = QString::number(i);
|
|
pRun->setAutoDelete(true);
|
|
m_pool->start(pRun);
|
|
}
|
|
m_pool->waitForDone();
|
|
|
|
for (int i = 0; i < nTotal ; i++)
|
|
{
|
|
int j = 0;
|
|
while(j < pList[i].size())
|
|
{
|
|
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_DATE);
|
|
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_DATA);
|
|
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_NICKNAME);
|
|
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_PARENT);
|
|
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_ID);
|
|
m_pData->setData(m_pData->SqlString(m_pData->GetSafeUtf(pList[i].at(j++))),SCrawlerData::ARTICLE_ORDER);
|
|
m_pData->sendDB();
|
|
}
|
|
}
|
|
delete [] pList;
|
|
}
|
|
|
|
void SReplyGetManage::Start(SCrawlerData *_pData)
|
|
{
|
|
_pData->setData("reply", SCrawlerData::ARTICLE_FORM);
|
|
m_pData = _pData;
|
|
switch(m_nSelect)
|
|
{
|
|
case E_NAVER_NEWS:
|
|
NaverNewsRun();
|
|
break;
|
|
case E_DAUM_CAFE:
|
|
DaumCafeRun();
|
|
break;
|
|
}
|
|
}
|
|
|