diff --git a/CrawlerList/CrawlerList.pro b/CrawlerList/CrawlerList.pro index 37bcdc8..2a4656c 100644 --- a/CrawlerList/CrawlerList.pro +++ b/CrawlerList/CrawlerList.pro @@ -27,7 +27,8 @@ SOURCES += main.cpp\ sinstatagmanage.cpp \ skakaousermanage.cpp \ sfacebooktagmanage.cpp \ - sfacebookusermanage.cpp + sfacebookusermanage.cpp \ + snaverblogaccuracymanager.cpp HEADERS += widget.h \ smanage.h \ @@ -43,5 +44,6 @@ HEADERS += widget.h \ sinstatagmanage.h \ skakaousermanage.h \ sfacebooktagmanage.h \ - sfacebookusermanage.h + sfacebookusermanage.h \ + snaverblogaccuracymanage.h diff --git a/CrawlerList/snaverblogaccuracymanage.h b/CrawlerList/snaverblogaccuracymanage.h new file mode 100644 index 0000000..a1f1da6 --- /dev/null +++ b/CrawlerList/snaverblogaccuracymanage.h @@ -0,0 +1,35 @@ +#ifndef SNAVERBLOGACCURACYMANAGER_H +#define SNAVERBLOGACCURACYMANAGER_H +#include "smanage.h" + +class SNaverBlogAccuracyManage : public SManage +{ +public: + enum E_PROCESS_STATE + { + E_PROCESS_LIST_RUN = 0, + E_PROCESS_LIST_FINISH_WAIT, + E_PROCESS_URL_RUN, + E_PROCESS_URL_FINISH_WAIT, + E_PROCESS_COMMENT_RUN, + E_PROCESS_COMMENT_FINISH_WAIT, + }; +public: + SNaverBlogAccuracyManage(QObject *pObject); +private: + QString makeGetListQuery(QString _str,QDate _date); + QString makeGetCommentQuery(QString _strUrl); +private: + QString m_strQuery; + QVector m_strListURL; + const QString C_TABLE_URL; + const QString C_TABLE_COM; + int m_nUrlTable; +protected: + bool Update(); + void Start(); + void processFinished(QProcess *pPro,QString _strOut); + void ReLoadList(); +}; +#endif // SNAVERBLOGACCURACYMANAGER_H + diff --git a/CrawlerList/snaverblogaccuracymanager.cpp b/CrawlerList/snaverblogaccuracymanager.cpp new file mode 100644 index 0000000..0fed5e6 --- /dev/null +++ b/CrawlerList/snaverblogaccuracymanager.cpp @@ -0,0 +1,249 @@ +#include "snaverblogaccuracymanage.h" +#include "widget.h" +#include +#include +#include +#include +#include +#include + +SNaverBlogAccuracyManage::SNaverBlogAccuracyManage(QObject *pObject) : SManage(pObject) , C_TABLE_URL("NAVER_BLOG_BODY_") , C_TABLE_COM("NAVER_BLOG_REPLY_") +{ + m_nID = 0; +} + +QString SNaverBlogAccuracyManage::makeGetListQuery(QString _str,QDate _date) +{ + //http://cafeblog.search.naver.com/search.naver?where=post&query=%EC%84%B1%ED%98%95&ie=utf8&st=date&sm=tab_opt&date_from=20140101&date_to=20150311&date_option=6&srchby=all&dup_remove=1&post_blogurl=&post_blogurl_without=&nso=so%3Add%2Ca%3Aall%2Cp%3Afrom20140101to20150311&mson=0 + //http://cafeblog.search.naver.com/search.naver?where=post&sm=tab_pge&query=%EC%84%B1%ED%98%95&st=date&date_option=6&date_from=20140101&date_to=20140101&dup_remove=1&post_blogurl=&post_blogurl_without=&srchby=all&nso=so%3Add%2Cp%3Afrom20140101to20140101&ie=utf8&start=31 + + + + QString str; + QString strDate = _date.toString("yyyyMMdd"); + //http://cafeblog.search.naver.com/search.naver?where=post&sm=tab_pge&query=%EC%95%84%EC%9D%B4%ED%8F%B0&st=date&date_option=6&date_from=20131103&date_to=20131103&dup_remove=1&post_blogurl=&post_blogurl_without=&srchby=all&nso=so%3Add%2Cp%3Afrom20131103to20131103&ie=utf8&start=11 + /* + str = "http://cafeblog.search.naver.com/search.naver?where=post&sm=tab_pge&query="; + str += EncodetoUtf8(_str,true); + //str += "&st=date&date_option=6&date_from=" + strDate + "&date_to=" + strDate ; + str += "&st=date&date_option=6&date_from="; + str += strDate; + str += "&date_to="; + str += strDate ; + str += "&dup_remove=1&post_blogurl=&post_blogurl_without=&srchby=all&nso=so%3Add%2Cp%3Afrom"; + str += strDate + "to" + strDate +"&ie=utf8&start="; + str += QString::number(m_ncList); + */ + //https://search.naver.com/search.naver?where=post&sm=tab_pge&query=%EC%84%B1%ED%98%95&st=sim&date_option=6&date_from=20030520&date_to=20160512&dup_remove=1&post_blogurl=&post_blogurl_without=&srchby=all&nso=p%3Afrom20030520to20160512&ie=utf8&start=11 + str = "https://search.naver.com/search.naver?where=post&sm=tab_pge&query="; + str += EncodetoUtf8(_str,true); + str += "&st=sim&date_option=6&date_from=20030520&date_to="; + str += strDate; + str += "&dup_remove=1&post_blogurl="; + str += m_strAuthorship; + str += "&post_blogurl_without=&srchby=all&nso=p%3Afrom20030520to"; + str += strDate + "&ie=utf8&start="; + str += QString::number(m_ncList); + + return str; +} + +QString SNaverBlogAccuracyManage::makeGetCommentQuery(QString _strUrl) +{ + //http://blog.naver.com/kohaku3533/220149821481/CommentList.nhn?blogId=kohaku3533&logNo=220149821481¤tPage=&isMemolog=false&focusingCommentNo=&showLastPage=true&shortestContentAreaWidth=false; + QStringList strList = _strUrl.split("/"); + QString strOut = ""; + if(strList.at(2).compare("blog.naver.com") == 0) + { + //strOut = _strUrl; + strOut = "http://blog.naver.com"; + strOut += "/CommentList.nhn?blogId="; + strOut += strList.at(3); + strOut += "&logNo="; + strOut += strList.at(4); + strOut += "¤tPage=&isMemolog=false&focusingCommentNo=&showLastPage=true&shortestContentAreaWidth=false"; + } + else //id.blog.me + { + strOut = "http://blog.naver.com"; + //strOut += strList.at(2).split(".").at(0); + //strOut += "/"; + //strOut += strList.at(3); + strOut += "/CommentList.nhn?blogId="; + strOut += strList.at(2).split(".").at(0); + strOut += "&logNo="; + strOut += strList.at(3); + strOut += "¤tPage=&isMemolog=false&focusingCommentNo=&showLastPage=true&shortestContentAreaWidth=false"; + } + return strOut; +} + +void SNaverBlogAccuracyManage::Start() +{ + m_nMode = E_PROCESS_LIST_RUN; + m_ncList = 1; + m_bFinalLast = false; +} + +void SNaverBlogAccuracyManage::processFinished(QProcess *pPro,QString _strOut) +{ + switch(m_nMode) + { + case E_PROCESS_LIST_FINISH_WAIT: + { + m_bLast = false; + bool reloaded = false; + if (_strOut.right(4) == "last" || m_ncList >= 991 ) + m_bLast = true; + + if (m_bLast == false && m_nUntilPage > 0) + { + if ((m_ncList/10) >= m_nUntilPage) + m_bLast = true; + } + + if (_strOut.right(5) == "block") + { + reloaded = true; + ReLoadList(); + } + + if(_strOut.right(7) == "loading") + { + reloaded = true; + ReLoadList(); + } + QStringList strOutList = _strOut.split("\n"); + if(strOutList.length() > 2) + { + if(_strOut.split("\n").at(2).trimmed().length() == 0) + { + if(reloaded == false) + { + ReLoadList(); + } + } + } + else + { + if(reloaded == false) + { + ReLoadList(); + } + } + m_strListURL.clear(); + foreach(QString str,_strOut.split("\n")) + { + if (str.isEmpty()) continue; + if (str.at(0) == QChar('o')) + { + if (str.right(str.length()-2).trimmed().isEmpty() == false) + m_strListURL.push_back(str.right(str.length()-2).trimmed()); + } + } + m_ncUrl = 0; + if (m_strListURL.size() == 0) + { + m_nMode = E_PROCESS_LIST_RUN; + CheckLast(); + } + else + m_nMode = E_PROCESS_URL_RUN; + break; + } + case E_PROCESS_URL_FINISH_WAIT: + if (UseProcess() == false) + { + m_nMode = E_PROCESS_COMMENT_RUN; + } + break; + case E_PROCESS_COMMENT_FINISH_WAIT: + if (m_ncUrl >= m_strListURL.size()) + { + m_nMode = E_PROCESS_LIST_RUN; + CheckLast(); + m_bLast = false; + } + else + m_nMode = E_PROCESS_URL_RUN; + break; + } +} + +bool SNaverBlogAccuracyManage::Update() +{ + if (m_bFinalLast) return m_bFinalLast; + + switch(m_nMode) + { + case E_PROCESS_LIST_RUN: + if (UseProcess() == false) + { + m_strQuery = makeGetListQuery(m_strKeyword,m_date); + //m_pMain->InsertLog(m_nID,"Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd")); + m_pMain->InsertLog("Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd")); + { +#if defined(Q_OS_WIN32) + m_pro[0].start("CrawlerProcess",QStringList()<< "naver" << "blog_list" << m_strQuery << m_strGroupID << m_strKeywordID); +#else + m_pro[0].start("./CrawlerProcess",QStringList()<< "naver" << "blog_list" << m_strQuery << m_strGroupID << m_strKeywordID); +#endif + m_ncList+=10; + } + m_nMode = E_PROCESS_LIST_FINISH_WAIT; + m_nWait = 0; + } + break; + case E_PROCESS_URL_RUN: + if (UseProcess() == false) + { + m_pMain->InsertLog("(" + QString::number(m_ncUrl+1) + "/" + QString::number(m_strListURL.size()) + ")"); +#if defined(Q_OS_WIN32) + m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "blog_url" << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strKeywordID ); +#else + m_pro[0].start("./CrawlerProcess",QStringList() << "naver" << "blog_url" << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strKeywordID ); +#endif + m_nMode = E_PROCESS_URL_FINISH_WAIT; + m_nWait = 0; + } + break; + case E_PROCESS_COMMENT_RUN: + if (UseProcess() == false) + { +#if defined(Q_OS_WIN32) + m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "blog_comm" << makeGetCommentQuery(m_strListURL.at(m_ncUrl++)) << m_strGroupID << "" ); +#else + m_pro[0].start("./CrawlerProcess",QStringList() << "naver" << "blog_comm" << makeGetCommentQuery(m_strListURL.at(m_ncUrl++)) << m_strGroupID << "" ); +#endif + m_nMode = E_PROCESS_COMMENT_FINISH_WAIT; + m_nWait = 0; + } + break; + case E_PROCESS_LIST_FINISH_WAIT: + case E_PROCESS_URL_FINISH_WAIT: + case E_PROCESS_COMMENT_FINISH_WAIT: + m_nWait++; + if (m_nWait >=300) + { + { + { + m_pMain->InsertLog("Kill Process."); + m_pro[0].kill(); + } + } + ReLoadList(); + } + break; + } + return m_bFinalLast; +} + +void SNaverBlogAccuracyManage::ReLoadList() +{ + m_nMode = E_PROCESS_LIST_RUN; + m_ncList -= 10; + if(m_ncList < 0) + m_ncList = 1; + m_pMain->InsertLog("Reload List"); + +} diff --git a/CrawlerList/snaverblogmanage.cpp b/CrawlerList/snaverblogmanage.cpp index 41865b4..c2af4cf 100644 --- a/CrawlerList/snaverblogmanage.cpp +++ b/CrawlerList/snaverblogmanage.cpp @@ -69,7 +69,7 @@ QString SNaverBlogManage::makeGetCommentQuery(QString _strUrl) } else //id.blog.me { - strOut = "http://blog.naver.com"; + strOut = "http://blog.naver.com/"; //strOut += strList.at(2).split(".").at(0); //strOut += "/"; //strOut += strList.at(3); diff --git a/CrawlerList/widget.cpp b/CrawlerList/widget.cpp index 88037cd..34cc9e6 100644 --- a/CrawlerList/widget.cpp +++ b/CrawlerList/widget.cpp @@ -21,6 +21,7 @@ #include "sinstausermanage.h" #include "sfacebooktagmanage.h" #include "sfacebookusermanage.h" +#include "snaverblogaccuracymanage.h" #include #include @@ -49,6 +50,7 @@ Widget::Widget(QWidget *parent) : QWidget(parent) , m_nMode(E_MODE_WAIT) m_pInstaUser = new SInstaUserManage(this); m_pFacebookTag = new SFacebookTagManage(this); m_pFacebookUser = new SFacebookUserManage(this); + m_pNaverBlogAccuracy = new SNaverBlogAccuracyManage(this); m_pManage[0] = m_pNaverCafe; m_pManage[1] = m_pNaverBlog; @@ -63,6 +65,7 @@ Widget::Widget(QWidget *parent) : QWidget(parent) , m_nMode(E_MODE_WAIT) m_pManage[10] = m_pInstaUser; m_pManage[11] = m_pFacebookTag; m_pManage[12] = m_pFacebookUser; + m_pManage[13] = m_pNaverBlogAccuracy; m_db = QSqlDatabase::addDatabase("QMYSQL"); m_db.setHostName("bigbird.iptime.org"); @@ -365,6 +368,7 @@ void Widget::RefreshButton() case 10:str += ", Instagram User"; break; case 11:str += ", Facebook Tag"; break; case 12:str += ", Facebook User"; break; + case 13:str += ", Naver Blog Accuracy"; break; } m_pcb->addItem(str,query.value(7)); } diff --git a/CrawlerList/widget.h b/CrawlerList/widget.h index 15903fa..048d5a5 100644 --- a/CrawlerList/widget.h +++ b/CrawlerList/widget.h @@ -25,7 +25,7 @@ class SInstaTagManage; class SInstaUserManage; class SFacebookTagManage; class SFacebookUserManage; - +class SNaverBlogAccuracyManage; #define SAFE_DELETE(p) {if(p) delete (p); (p) = NULL; } @@ -55,7 +55,7 @@ private: QLineEdit *m_pedStartDay; QTimer m_timer,m_timerAlive; QSqlDatabase m_db; - static const int C_PLATFORM_MAX = 13; + static const int C_PLATFORM_MAX = 14; SManage *m_pManage[C_PLATFORM_MAX]; QListWidget *m_pResultList; QString m_strFileName; @@ -74,7 +74,7 @@ private: SInstaUserManage *m_pInstaUser; SFacebookTagManage *m_pFacebookTag; SFacebookUserManage *m_pFacebookUser; - + SNaverBlogAccuracyManage *m_pNaverBlogAccuracy; int m_nStartTime,m_nRangeTime,m_nPlatform; //QGroupBox *m_pgbManual; QCheckBox *m_pcheckboxReal;