Files
clients/CrawlerList/snaverblogmanage.cpp
admin 637ce292d1 filterprocess update시 sqlString 변환
git-svn-id: svn://192.168.0.12/source@329 8346c931-da38-4b9b-9d4c-e48b93cbd075
2016-12-20 09:22:25 +00:00

252 lines
8.9 KiB
C++

#include "snaverblogmanage.h"
#include "widget.h"
#include <QSqlQuery>
#include <QSqlError>
#include <QDebug>
#include <QFile>
#include <QTextStream>
#include <QNetworkProxy>
SNaverBlogManage::SNaverBlogManage(QObject *pObject) : SManage(pObject) , C_TABLE_URL("NAVER_BLOG_BODY_") , C_TABLE_COM("NAVER_BLOG_REPLY_")
{
m_nID = 0;
}
QString SNaverBlogManage::makeGetListQuery(QString _str,QDate _date)
{
//http://cafeblog.search.naver.com/search.naver?where=post&query=%EC%84%B1%ED%98%95&ie=utf8&st=date&sm=tab_opt&date_from=20140101&date_to=20150311&date_option=6&srchby=all&dup_remove=1&post_blogurl=&post_blogurl_without=&nso=so%3Add%2Ca%3Aall%2Cp%3Afrom20140101to20150311&mson=0
//http://cafeblog.search.naver.com/search.naver?where=post&sm=tab_pge&query=%EC%84%B1%ED%98%95&st=date&date_option=6&date_from=20140101&date_to=20140101&dup_remove=1&post_blogurl=&post_blogurl_without=&srchby=all&nso=so%3Add%2Cp%3Afrom20140101to20140101&ie=utf8&start=31
QString str;
QString strDate = _date.toString("yyyyMMdd");
//http://cafeblog.search.naver.com/search.naver?where=post&sm=tab_pge&query=%EC%95%84%EC%9D%B4%ED%8F%B0&st=date&date_option=6&date_from=20131103&date_to=20131103&dup_remove=1&post_blogurl=&post_blogurl_without=&srchby=all&nso=so%3Add%2Cp%3Afrom20131103to20131103&ie=utf8&start=11
/*
str = "http://cafeblog.search.naver.com/search.naver?where=post&sm=tab_pge&query=";
str += EncodetoUtf8(_str,true);
//str += "&st=date&date_option=6&date_from=" + strDate + "&date_to=" + strDate ;
str += "&st=date&date_option=6&date_from=";
str += strDate;
str += "&date_to=";
str += strDate ;
str += "&dup_remove=1&post_blogurl=&post_blogurl_without=&srchby=all&nso=so%3Add%2Cp%3Afrom";
str += strDate + "to" + strDate +"&ie=utf8&start=";
str += QString::number(m_ncList);
*/
str = "https://search.naver.com/search.naver?where=post&sm=tab_pge&query=";
str += EncodetoUtf8(_str,true);
str += "&st=date&date_option=8&date_from=";
str += strDate;
str += "&date_to=";
str += strDate;
str += "&dup_remove=1&post_blogurl=";
//if(m_strAuthorship.length() > 0)
// str += "blog.naver.com%2F";
str += m_strAuthorship;
str += "&post_blogurl_without=&srchby=all&nso=so%3Add%2Cp%3Afrom";
str += strDate + "to" + strDate + "&ie=utf8&start=";
str += QString::number(m_ncList);
return str;
}
QString SNaverBlogManage::makeGetCommentQuery(QString _strUrl)
{
//http://blog.naver.com/kohaku3533/220149821481/CommentList.nhn?blogId=kohaku3533&logNo=220149821481&currentPage=&isMemolog=false&focusingCommentNo=&showLastPage=true&shortestContentAreaWidth=false;
QStringList strList = _strUrl.split("/");
QString strOut = "";
if(strList.at(2).compare("blog.naver.com") == 0)
{
//strOut = _strUrl;
strOut = "http://blog.naver.com";
strOut += "/CommentList.nhn?blogId=";
strOut += strList.at(3);
strOut += "&logNo=";
strOut += strList.at(4);
strOut += "&currentPage=&isMemolog=false&focusingCommentNo=&showLastPage=true&shortestContentAreaWidth=false";
}
else //id.blog.me
{
strOut = "http://blog.naver.com/";
//strOut += strList.at(2).split(".").at(0);
//strOut += "/";
//strOut += strList.at(3);
strOut += "/CommentList.nhn?blogId=";
strOut += strList.at(2).split(".").at(0);
strOut += "&logNo=";
strOut += strList.at(3);
strOut += "&currentPage=&isMemolog=false&focusingCommentNo=&showLastPage=true&shortestContentAreaWidth=false";
}
return strOut;
}
void SNaverBlogManage::Start()
{
m_nMode = E_PROCESS_LIST_RUN;
m_ncList = 1;
m_bFinalLast = false;
}
void SNaverBlogManage::processFinished(QProcess *pPro,QString _strOut)
{
switch(m_nMode)
{
case E_PROCESS_LIST_FINISH_WAIT:
{
m_bLast = false;
bool reloaded = false;
if (_strOut.right(4) == "last" || m_ncList >= 991 )
m_bLast = true;
if (_strOut.right(5) == "block")
{
reloaded = true;
ReLoadList();
}
if(_strOut.right(7) == "loading")
{
reloaded = true;
ReLoadList();
}
QStringList strOutList = _strOut.split("\n");
if(strOutList.length() > 2)
{
if(_strOut.split("\n").at(2).trimmed().length() == 0)
{
if(reloaded == false)
{
ReLoadList();
}
}
}
else
{
if(reloaded == false)
{
ReLoadList();
}
}
if (m_bLast == false && m_nUntilPage > 0)
{
if ((m_ncList/10) >= m_nUntilPage)
m_bLast = true;
}
m_strListURL.clear();
foreach(QString str,_strOut.split("\n"))
{
if (str.isEmpty()) continue;
if (str.at(0) == QChar('o'))
{
if (str.right(str.length()-2).trimmed().isEmpty() == false)
m_strListURL.push_back(str.right(str.length()-2).trimmed());
}
}
m_ncUrl = 0;
if (m_strListURL.size() == 0)
{
m_nMode = E_PROCESS_LIST_RUN;
CheckLast();
}
else
m_nMode = E_PROCESS_URL_RUN;
break;
}
case E_PROCESS_URL_FINISH_WAIT:
if (UseProcess() == false)
{
m_nMode = E_PROCESS_COMMENT_RUN;
}
break;
case E_PROCESS_COMMENT_FINISH_WAIT:
if (m_ncUrl >= m_strListURL.size())
{
m_nMode = E_PROCESS_LIST_RUN;
CheckLast();
m_bLast = false;
}
else
m_nMode = E_PROCESS_URL_RUN;
break;
}
}
bool SNaverBlogManage::Update()
{
if (m_bFinalLast) return m_bFinalLast;
switch(m_nMode)
{
case E_PROCESS_LIST_RUN:
if (UseProcess() == false)
{
m_strQuery = makeGetListQuery(m_strKeyword,m_date);
//m_pMain->InsertLog(m_nID,"Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd"));
m_pMain->InsertLog("Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd"));
{
#if defined(Q_OS_WIN32)
m_pro[0].start("CrawlerProcess",QStringList()<< "naver" << "blog_list" << m_strQuery << m_strGroupID << m_strKeywordID);
#else
m_pro[0].start("./CrawlerProcess",QStringList()<< "naver" << "blog_list" << m_strQuery << m_strGroupID << m_strKeywordID);
#endif
m_ncList+=10;
}
m_nMode = E_PROCESS_LIST_FINISH_WAIT;
m_nWait = 0;
}
break;
case E_PROCESS_URL_RUN:
if (UseProcess() == false)
{
m_pMain->InsertLog("(" + QString::number(m_ncUrl+1) + "/" + QString::number(m_strListURL.size()) + ")");
#if defined(Q_OS_WIN32)
m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "blog_url" << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strKeywordID );
#else
m_pro[0].start("./CrawlerProcess",QStringList() << "naver" << "blog_url" << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strKeywordID );
#endif
m_nMode = E_PROCESS_URL_FINISH_WAIT;
m_nWait = 0;
}
break;
case E_PROCESS_COMMENT_RUN:
if (UseProcess() == false)
{
#if defined(Q_OS_WIN32)
m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "blog_comm" << makeGetCommentQuery(m_strListURL.at(m_ncUrl++)) << m_strGroupID << "" );
#else
m_pro[0].start("./CrawlerProcess",QStringList() << "naver" << "blog_comm" << makeGetCommentQuery(m_strListURL.at(m_ncUrl++)) << m_strGroupID << "" );
#endif
m_nMode = E_PROCESS_COMMENT_FINISH_WAIT;
m_nWait = 0;
}
break;
case E_PROCESS_LIST_FINISH_WAIT:
case E_PROCESS_URL_FINISH_WAIT:
case E_PROCESS_COMMENT_FINISH_WAIT:
m_nWait++;
if (m_nWait >= 180)
{
{
{
m_pMain->InsertLog("Kill Process.");
m_pro[0].kill();
}
}
ReLoadList();
}
break;
}
return m_bFinalLast;
}
void SNaverBlogManage::ReLoadList()
{
m_nMode = E_PROCESS_LIST_RUN;
m_ncList -= 10;
if(m_ncList < 0)
m_ncList = 1;
m_pMain->InsertLog("Reload List");
}