Files
clients/CrawlerList/snavernewsmanage.cpp

201 lines
6.1 KiB
C++

#include "snavernewsmanage.h"
#include "widget.h"
#include <QDebug>
SNaverNewsManage::SNaverNewsManage(QObject *pObject) : SManage(pObject)
{
m_nID = 0;
connect(&m_pro[0], SIGNAL(readyReadStandardOutput()), this, SLOT());
}
QString SNaverNewsManage::makeGetListQuery(QString _str,QDate _date,int _nPage)
{
QString strOut = "http://news.naver.com/main/search/search.nhn?query=";
strOut += EncodetoEucKr(_str,true);
strOut += "&st=news.all&q_enc=EUC-KR&r_enc=UTF-8&r_format=xml&rp=none&sm=all.basic&ic=all&ie=MS949&so=datetime.dsc&stDate=range:";
strOut += _date.toString("yyyyMMdd:yyyyMMdd");
strOut += "&detail=1&pd=4&start=1&display=25&startDate=";
strOut += _date.toString("yyyy-MM-dd");
strOut += "&endDate=";
strOut += _date.toString("yyyy-MM-dd");
strOut += "&page=" + QString::number(_nPage);
return strOut;
}
void SNaverNewsManage::Start()
{
m_nMode = E_PROCESS_LIST_RUN;
m_bFinalLast = false;
}
bool SNaverNewsManage::Update()
{
if (m_bFinalLast) return m_bFinalLast;
switch(m_nMode)
{
case E_PROCESS_LIST_RUN:
if (UseProcess() == false)
{
m_strListQuery = makeGetListQuery(m_strKeyword,m_date,m_ncList);
m_pMain->InsertLog("Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd"));
{
#if defined(Q_OS_WIN32)
m_pro[0].start("CrawlerProcess",QStringList()<< "naver" << "news_list" << m_strListQuery);
#else
m_pro[0].start("./CrawlerProcess",QStringList()<< "naver" << "news_list" << m_strListQuery);
#endif
m_ncList++;
}
m_nMode = E_PROCESS_LIST_FINISH_WAIT;
m_nWait = 0;
}
break;
case E_PROCESS_URL_RUN:
if (UseProcess() == false)
{
m_pMain->InsertLog("(" + QString::number(m_ncUrl+1) + "/" + QString::number(m_strListURL.size()) + ")");
{
#if defined(Q_OS_WIN32)
m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "news_data" << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strKeywordID );
#else
m_pro[0].start("./CrawlerProcess",QStringList() << "naver" << "news_data" << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strKeywordID );
#endif
//m_ncUrl++;
}
m_nMode = E_PROCESS_URL_FINISH_WAIT;
m_nWait = 0;
}
break;
case E_PROCESS_COMMENT_RUN:
if (UseProcess() == false)
{
#if defined(Q_OS_WIN32)
m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "news_comm" << m_strListURL.at(m_ncUrl) + "&m_view=1" << m_strGroupID << "" );
#else
m_pro[0].start("./CrawlerProcess",QStringList() << "naver" << "news_comm" << m_strListURL.at(m_ncUrl) + "&m_view=1" << m_strGroupID << "" );
#endif
m_ncUrl++;
m_nMode = E_PROCESS_COMMENT_FINISH_WAIT;
m_nWait = 0;
}
break;
case E_PROCESS_LIST_FINISH_WAIT:
case E_PROCESS_URL_FINISH_WAIT:
case E_PROCESS_COMMENT_FINISH_WAIT:
m_nWait++;
if (m_nWait >= 180)
{
m_pMain->InsertLog("Kill Process.");
m_pro[0].kill();
ReLoadList();
}
break;
}
return m_bFinalLast;
}
void SNaverNewsManage::processFinished(QProcess *_pPro,QString _strOut)
{
switch(m_nMode)
{
case E_PROCESS_LIST_FINISH_WAIT:
{
m_bLast = false;
bool reloaded = false;
if (_strOut.right(4) == "last" || m_ncList >= 160)
m_bLast = true;
if (m_bLast == false && m_nUntilPage > 0)
{
if (m_ncList >= m_nUntilPage)
m_bLast = true;
}
if (_strOut.right(5) == "block")
{
reloaded = true;
ReLoadList();
}
if(_strOut.right(7) == "loading")
{
reloaded = true;
ReLoadList();
}
if(_strOut.right(7) == "nothing")
{
m_nMode = E_PROCESS_LIST_RUN;
CheckLast();
return;
}
QStringList strOutList = _strOut.split("\n");
if(strOutList.length() > 2)
{
if(_strOut.split("\n").at(2).trimmed().length() == 0)
{
if(reloaded == false)
{
ReLoadList();
}
}
}
else
{
if(reloaded == false)
{
ReLoadList();
}
}
m_strListURL.clear();
foreach(QString str,_strOut.split("\n"))
{
if (str.isEmpty()) continue;
if (str.at(0) == QChar('o'))
m_strListURL.push_back(str.right(str.length()-2).trimmed());
}
m_ncUrl = 0;
if (m_strListURL.size() == 0)
{
m_nMode = E_PROCESS_LIST_RUN;
CheckLast();
//if(m_bLast == false)
//ReLoadList();
}
else
m_nMode = E_PROCESS_URL_RUN;
break;
}
case E_PROCESS_URL_FINISH_WAIT:
if (UseProcess() == false)
{
m_nMode = E_PROCESS_COMMENT_RUN;
}
break;
case E_PROCESS_COMMENT_FINISH_WAIT:
//m_nMode = E_PROCESS_LIST_RUN;
if (m_ncUrl >= m_strListURL.size())
{
m_nMode = E_PROCESS_LIST_RUN;
CheckLast();
m_bLast = false;
}
else
m_nMode = E_PROCESS_URL_RUN;
break;
}
}
void SNaverNewsManage::ReLoadList()
{
m_nMode = E_PROCESS_LIST_RUN;
m_ncList--;
if(m_ncList < 0)
m_ncList = 1;
m_pMain->InsertLog("Reload List");
}