246 lines
8.9 KiB
C++
246 lines
8.9 KiB
C++
#include "snavercafemanage.h"
|
|
#include "widget.h"
|
|
#include <QSqlQuery>
|
|
#include <QSqlError>
|
|
#include <qDebug>
|
|
#include <QFile>
|
|
|
|
SNaverCafeManage::SNaverCafeManage(QObject *pObject) : SManage(pObject)
|
|
{
|
|
m_nID = 0;
|
|
}
|
|
|
|
QString SNaverCafeManage::makeGetListQuery(QString _str,QDate _date,int _nPage)
|
|
{
|
|
QString strOut;
|
|
strOut = "http://cafeblog.search.naver.com/search.naver?where=article&ie=utf8&query=";
|
|
strOut += EncodetoUtf8(_str,true);
|
|
strOut += "&t=0&st=date&date_option=6&date_from=";
|
|
strOut += _date.toString("yyyy.MM.dd");
|
|
strOut += "&date_to=";
|
|
strOut += _date.toString("yyyy.MM.dd");
|
|
//strOut += "&srchby=text&dup_remove=1&cafe_url=&without_cafe_url=&board=&sm=tab_pge&nso=so%3Add%2Cp%3Afrom";
|
|
strOut += "&srchby=text&dup_remove=1&";
|
|
strOut += "cafe_url=";
|
|
strOut += m_strAuthorship;
|
|
strOut += "&without_cafe_url=&board=&sm=tab_pge&nso=so:dd,p:from";
|
|
strOut += _date.toString("yyyyMMdd");
|
|
strOut += "to";
|
|
strOut += _date.toString("yyyyMMdd");
|
|
//strOut += "%2Ca%3Aall&start=" + QString::number(_nPage);
|
|
strOut += ",a:all&start=" + QString::number(_nPage);
|
|
//qDebug() << strOut;
|
|
return strOut;
|
|
}
|
|
|
|
void SNaverCafeManage::Start()
|
|
{
|
|
m_nMode = E_PROCESS_LIST_RUN;
|
|
m_bFinalLast = false;
|
|
}
|
|
|
|
bool SNaverCafeManage::Update()
|
|
{
|
|
if (m_bFinalLast) return m_bFinalLast;
|
|
switch(m_nMode)
|
|
{
|
|
case E_PROCESS_LIST_RUN:
|
|
if (UseProcess() == false)
|
|
{
|
|
m_strListQuery = makeGetListQuery(m_strKeyword,m_date,m_ncList);
|
|
m_pMain->InsertLog(m_nID,"Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd"));
|
|
{
|
|
m_pro[0].start("CrawlerProcess",QStringList()<< "naver" << "cafe_list" << m_strListQuery << m_strGroupID << m_strKeywordID);
|
|
m_pro[0].SetState(SProcess::STATE_RUNNING);
|
|
m_ncList+=10;
|
|
}
|
|
m_nMode = E_PROCESS_LIST_FINISH_WAIT;
|
|
m_nWait = 0;
|
|
}
|
|
break;
|
|
case E_PROCESS_URL_RUN:
|
|
if (UseProcess() == false)
|
|
{
|
|
m_pMain->InsertLog(m_nID,"(" + QString::number(m_ncUrl+1) + "/" + QString::number(m_strListURL.size()) + ")");
|
|
{
|
|
m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "cafe_data" << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strListQuery << "" );
|
|
m_pro[0].SetState(SProcess::STATE_RUNNING);
|
|
m_ncUrl++;
|
|
}
|
|
m_nMode = E_PROCESS_URL_FINISH_WAIT;
|
|
m_nWait = 0;
|
|
}
|
|
break;
|
|
case E_PROCESS_LIST_FINISH_WAIT:
|
|
case E_PROCESS_URL_FINISH_WAIT:
|
|
m_nWait++;
|
|
if (m_nWait > (100000/m_nTime))
|
|
{
|
|
//for(int i = 0; i < C_PROCESS_MAX ; i++)
|
|
{
|
|
if (m_pro[0].State() != SProcess::STATE_WAIT)
|
|
{
|
|
m_pro[0].kill();
|
|
m_pro[0].SetState(SProcess::STATE_WAIT);
|
|
m_pMain->InsertLog(m_nID,"Kill Process.");
|
|
}
|
|
}
|
|
if (m_nMode == E_PROCESS_LIST_FINISH_WAIT) return m_bFinalLast;
|
|
}
|
|
break;
|
|
}
|
|
return m_bFinalLast;
|
|
}
|
|
|
|
void SNaverCafeManage::processFinished(SProcess *_pPro,QString _strOut)
|
|
{
|
|
switch(m_nMode)
|
|
{
|
|
case E_PROCESS_LIST_FINISH_WAIT:
|
|
{
|
|
m_bLast = false;
|
|
if (_strOut.right(4) == "last" || m_ncList >= 1000)
|
|
m_bLast = true;
|
|
m_strListURL.clear();
|
|
foreach(QString str,_strOut.split("\n"))
|
|
{
|
|
if (str.isEmpty()) continue;
|
|
if (str.at(0) == QChar('o'))
|
|
m_strListURL.push_back(str.right(str.length()-2).trimmed());
|
|
}
|
|
|
|
/*
|
|
QSqlQuery query;
|
|
if (query.exec("SELECT URL FROM " + C_TABLE_URL + QString::number(m_nUrlTable) + " where ERROR is null"))
|
|
{
|
|
m_pMain->InsertLog(m_nID,query.lastError().text());
|
|
}
|
|
while (query.next())
|
|
m_strListURL.append(query.value(0).toString());
|
|
*/
|
|
m_ncUrl = 0;
|
|
if (m_strListURL.size() == 0)
|
|
{
|
|
m_nMode = E_PROCESS_LIST_RUN;
|
|
CheckLast();
|
|
}
|
|
else
|
|
m_nMode = E_PROCESS_URL_RUN;
|
|
break;
|
|
}
|
|
case E_PROCESS_URL_FINISH_WAIT:
|
|
m_nMode = E_PROCESS_LIST_RUN;
|
|
if (m_ncUrl >= m_strListURL.size())
|
|
{
|
|
m_nMode = E_PROCESS_LIST_RUN;
|
|
CheckLast();
|
|
m_bLast = false;
|
|
}
|
|
else
|
|
m_nMode = E_PROCESS_URL_RUN;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
void SNaverCafeManage::MakeTables()
|
|
{
|
|
QString strQuery = "show tables";
|
|
QSqlQuery query;
|
|
query.exec(strQuery);
|
|
int nUrlMax = -1;
|
|
while (query.next())
|
|
{
|
|
QString str = query.value(0).toString();
|
|
if (str.left(C_TABLE_URL.size()) == C_TABLE_URL.toUpper())
|
|
{
|
|
if (nUrlMax < str.mid(C_TABLE_URL.size()).toInt())
|
|
nUrlMax = str.mid(C_TABLE_URL.size()).toInt();
|
|
}
|
|
}
|
|
m_nUrlTable = nUrlMax + 1;
|
|
strQuery = "Create table " + C_TABLE_URL + QString::number(m_nUrlTable)+ "(Url CHAR(128) not null primary key,keyword_id INT,PlatformTitle CHAR(128),PlatformID CHAR(64),ArticleTitle VARCHAR(128),ArticleID CHAR(32),Date DATETIME,Nickname CHAR(32),Data VARCHAR(18432),Error CHAR(32)) CHARSET=utf8";
|
|
query.exec(strQuery);
|
|
strQuery = "Create table " + C_TABLE_COM + QString::number(m_nUrlTable)+ "(Url CHAR(128) not null,Nickname CHAR(32),Data VARCHAR(1024),Parent CHAR(64),Date DATETIME,UrlReply VARCHAR(512),RowNum INT) CHARSET=utf8";
|
|
query.exec(strQuery);
|
|
|
|
m_pMain->setWindowTitle("NaverCafeCrawler " + QString::number(m_nUrlTable));
|
|
}
|
|
|
|
void SNaverCafeManage::DropTables()
|
|
{
|
|
QString strQuery = "drop table ";
|
|
QSqlQuery query;
|
|
query.exec(strQuery + C_TABLE_URL + QString::number(m_nUrlTable));
|
|
query.exec(strQuery + C_TABLE_COM + QString::number(m_nUrlTable));
|
|
}
|
|
|
|
void SNaverCafeManage::Join()
|
|
{
|
|
m_pMain->InsertLog(m_nID,"Insert Article Data...");
|
|
QString strQuery = "insert into "
|
|
"data_" + m_strGroupID +
|
|
"(platformname , platformform , articleform ,"
|
|
"url , keyword_id , body_platformtitle , body_platformid , body_articletitle , body_articleid , body_date , body_nickname , body_data)"
|
|
"select "
|
|
"CONVERT('naver' USING utf8),"
|
|
"CONVERT('cafe' USING utf8),"
|
|
"CONVERT('article' USING utf8),"
|
|
"CONVERT(url USING utf8),"
|
|
"CONVERT(keyword_id USING utf8),"
|
|
"CONVERT(PlatformTitle USING utf8),"
|
|
"CONVERT(PlatformID USING utf8),"
|
|
"CONVERT(ArticleTitle USING utf8),"
|
|
"CONVERT(ArticleID USING utf8),"
|
|
"CONVERT(Date USING utf8),"
|
|
"CONVERT(Nickname USING utf8),"
|
|
"CONVERT(Data USING utf8)"
|
|
"from NAVER_CAFE_BODY_" + QString::number(m_nUrlTable);
|
|
QSqlQuery query;
|
|
if (query.exec(strQuery) == false)
|
|
{
|
|
m_pMain->InsertLog(m_nID,query.lastError().text());
|
|
return;
|
|
}
|
|
|
|
m_pMain->InsertLog(m_nID,"Insert Reply Data...");
|
|
|
|
strQuery = "insert into "
|
|
"data_" + m_strGroupID +
|
|
"(platformname , platformform , articleform ,"
|
|
"url , keyword_id , body_platformtitle , body_platformid , body_articletitle , body_articleid , body_date , body_nickname , body_data ,"
|
|
"reply_nickname ,reply_data, reply_parent , reply_date ,reply_urlreply ,reply_rownum )"
|
|
"select "
|
|
"CONVERT('naver' USING utf8),"
|
|
"CONVERT('cafe' USING utf8),"
|
|
"CONVERT('reply' USING utf8),"
|
|
"CONVERT(_body.url USING utf8),"
|
|
"CONVERT(_body.keyword_id USING utf8),"
|
|
"CONVERT(_body.PlatformTitle USING utf8),"
|
|
"CONVERT(_body.PlatformID USING utf8),"
|
|
"CONVERT(_body.ArticleTitle USING utf8),"
|
|
"CONVERT(_body.ArticleID USING utf8),"
|
|
"CONVERT(_body.Date USING utf8),"
|
|
"CONVERT(_body.Nickname USING utf8),"
|
|
"CONVERT(_body.Data USING utf8),"
|
|
"CONVERT(_reply.Nickname USING utf8),"
|
|
"CONVERT(_reply.Data USING utf8),"
|
|
"CONVERT(_reply.Parent USING utf8),"
|
|
"CONVERT(_reply.Date USING utf8),"
|
|
"CONVERT(_reply.UrlReply USING utf8),"
|
|
"CONVERT(_reply.RowNum USING utf8) "
|
|
"from NAVER_CAFE_BODY_" + QString::number(m_nUrlTable) + " _body INNER JOIN NAVER_CAFE_REPLY_" + QString::number(m_nUrlTable) + " _reply ON _body.Url = _reply.Url";
|
|
query.exec(strQuery);
|
|
|
|
if (query.exec(strQuery) == false)
|
|
{
|
|
m_pMain->InsertLog(m_nID,query.lastError().text());
|
|
return;
|
|
}
|
|
m_pMain->InsertLog(m_nID,"Delete data ...");
|
|
query.exec("delete from NAVER_CAFE_BODY_" + QString::number(m_nUrlTable) );
|
|
query.exec("delete from NAVER_CAFE_REPLY_" + QString::number(m_nUrlTable) );
|
|
m_pMain->InsertLog(m_nID,"Finish ... ");
|
|
}
|
|
*/
|