diff --git a/CrawlerList/sdaumcafemanage.cpp b/CrawlerList/sdaumcafemanage.cpp index 509a6e0..16b28e2 100644 --- a/CrawlerList/sdaumcafemanage.cpp +++ b/CrawlerList/sdaumcafemanage.cpp @@ -48,7 +48,7 @@ bool SDaumCafeManage::Update() switch(m_nMode) { case E_PROCESS_LIST_RUN: - if (UseProcess() == false && CheckTime()) + if (UseProcess() == false) { m_strListQuery = makeGetListQuery(m_strKeyword,m_date,m_ncList); m_pMain->InsertLog("Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd")); @@ -61,7 +61,7 @@ bool SDaumCafeManage::Update() } break; case E_PROCESS_URL_RUN: - if (UseProcess() == false && CheckTime()) + if (UseProcess() == false) { m_pMain->InsertLog("(" + QString::number(m_ncUrl+1) + "/" + QString::number(m_strListURL.size()) + ")"); { @@ -85,7 +85,7 @@ bool SDaumCafeManage::Update() m_pro[0].kill(); } } - if (m_nMode == E_PROCESS_LIST_FINISH_WAIT){ReLoadList(); return m_bFinalLast;} + ReLoadList(); //return m_bFinalLast; } break; } @@ -103,6 +103,12 @@ void SDaumCafeManage::processFinished(QProcess *_pPro,QString _strOut) if (_strOut.right(4) == "last" || m_ncList >= 1000) m_bLast = true; + if (m_bLast == false && m_nUntilPage > 0) + { + if (m_ncList >= m_nUntilPage) + m_bLast = true; + } + // if (_strOut.right(5) == "block") // m_bFinalLast = true; diff --git a/CrawlerList/smanage.cpp b/CrawlerList/smanage.cpp index 5d58956..df4b015 100644 --- a/CrawlerList/smanage.cpp +++ b/CrawlerList/smanage.cpp @@ -6,8 +6,7 @@ SManage::SManage(QObject *parent) : QObject(parent),m_pMain(0) { for(int i = 0; i < C_PROCESS_MAX ;i++) - connect(&m_pro[i],SIGNAL(finished(int,QProcess::ExitStatus)),SLOT(processFinished(int,QProcess::ExitStatus))); - //connect(&m_pro[0],SIGNAL(readyReadStandardOutput()),SLOT(processReadLine())); + connect(&m_pro[i],SIGNAL(finished(int,QProcess::ExitStatus)),SLOT(processFinished(int,QProcess::ExitStatus))); } SManage::~SManage() @@ -15,7 +14,7 @@ SManage::~SManage() m_pMain = 0; } -void SManage::Start(QDate _StartDate,QDate _EndDate,QString _strKeyword,QString _strAuthorship,QString _strKeywordID,QString _strGroupID,int _nStart,QString _strTime) +void SManage::Start(QDate _StartDate,QDate _EndDate,QString _strKeyword,QString _strAuthorship,QString _strKeywordID,QString _strGroupID,int _nStart,int _nUntilPage) { m_date = _StartDate; m_dateEnd = _EndDate; @@ -25,24 +24,7 @@ void SManage::Start(QDate _StartDate,QDate _EndDate,QString _strKeyword,QString m_strAuthorship = _strAuthorship; m_ncList = _nStart; m_strGroupID = _strGroupID; - QStringList strList = _strTime.split("~"); - switch(strList.size()) - { - case 0: - m_nStartTime = 3; - m_nRangeTime = 1; - break; - case 1: - m_nStartTime = strList.at(0).toInt(); - m_nRangeTime = 1; - break; - case 2: - m_nStartTime = strList.at(0).toInt(); - m_nRangeTime = strList.at(1).toInt() - strList.at(0).toInt(); - break; - } - m_timeEnd = QDateTime::currentDateTime(); - m_timeEnd = m_timeEnd.addSecs(rand() % m_nRangeTime + m_nStartTime); + m_nUntilPage = _nUntilPage; Start(); } @@ -93,13 +75,6 @@ bool SManage::UseProcess() return false; } -void SManage::processReadLine() -{ - QProcess *pPro = (QProcess*)sender(); - qDebug() << pPro->readAllStandardOutput(); -} - - void SManage::processFinished(int exitCode,QProcess::ExitStatus exitStatus) { QProcess *pPro = (QProcess*)sender(); @@ -147,14 +122,3 @@ void SManage::WaitExitProcess() bQuit = UseProcess(); } } - -bool SManage::CheckTime() -{ - if (QDateTime::currentDateTime() > m_timeEnd) - { - m_timeEnd = QDateTime::currentDateTime(); - m_timeEnd = m_timeEnd.addSecs(rand() % m_nRangeTime + m_nStartTime); - return true; - } - return false; -} diff --git a/CrawlerList/smanage.h b/CrawlerList/smanage.h index 355cc92..5e318fd 100644 --- a/CrawlerList/smanage.h +++ b/CrawlerList/smanage.h @@ -16,13 +16,12 @@ class SManage : public QObject private: QVector m_vecList; private slots: - void processFinished(int exitCode, QProcess::ExitStatus exitStatus); - void processReadLine(); + void processFinished(int exitCode, QProcess::ExitStatus exitStatus); public: explicit SManage(QObject *parent = 0); ~SManage(); public: - void Start(QDate _StartDate,QDate _EndDate,QString _strKeyword,QString _strAuthorship,QString _strKeywordID,QString _strGroupID,int _nStart,QString _strTime); + void Start(QDate _StartDate,QDate _EndDate,QString _strKeyword,QString _strAuthorship,QString _strKeywordID,QString _strGroupID,int _nStart,int _nUntilPage); void SetParent(Widget *pWidget); void WaitExitProcess(); virtual bool Update() = 0; @@ -31,12 +30,10 @@ protected: QString EncodetoUtf8(QString _str,bool _bExt=false); virtual void processFinished(QProcess *pPro,QString _strOut) = 0; bool UseProcess(); - void CheckLast(); - bool CheckTime(); + void CheckLast(); protected: Widget *m_pMain; QDate m_date,m_dateEnd; - QDateTime m_timeEnd; int m_nMode; QString m_strKeyword; QString m_strKeywordID; @@ -48,8 +45,7 @@ protected: int m_ncList; int m_ncUrl; int m_nWait; - int m_nStartTime; - int m_nRangeTime; + int m_nUntilPage; static const int C_PROCESS_MAX = 1; QProcess m_pro[C_PROCESS_MAX]; }; diff --git a/CrawlerList/snaverblogmanage.cpp b/CrawlerList/snaverblogmanage.cpp index 83199bc..327de3c 100644 --- a/CrawlerList/snaverblogmanage.cpp +++ b/CrawlerList/snaverblogmanage.cpp @@ -96,9 +96,15 @@ void SNaverBlogManage::processFinished(QProcess *pPro,QString _strOut) { m_bLast = false; bool reloaded = false; - if (_strOut.right(4) == "last" || m_ncList >= 991) + if (_strOut.right(4) == "last" || m_ncList >= 991 ) m_bLast = true; + if (m_bLast == false && m_nUntilPage > 0) + { + if ((m_ncList/10) >= m_nUntilPage) + m_bLast = true; + } + if (_strOut.right(5) == "block") { reloaded = true; @@ -110,10 +116,7 @@ void SNaverBlogManage::processFinished(QProcess *pPro,QString _strOut) reloaded = true; ReLoadList(); } - - QStringList strOutList = _strOut.split("\n"); - if(strOutList.length() > 2) { if(_strOut.split("\n").at(2).trimmed().length() == 0) @@ -136,15 +139,16 @@ void SNaverBlogManage::processFinished(QProcess *pPro,QString _strOut) { if (str.isEmpty()) continue; if (str.at(0) == QChar('o')) - m_strListURL.push_back(str.right(str.length()-2).trimmed()); + { + if (str.right(str.length()-2).trimmed().isEmpty() == false) + m_strListURL.push_back(str.right(str.length()-2).trimmed()); + } } m_ncUrl = 0; if (m_strListURL.size() == 0) { m_nMode = E_PROCESS_LIST_RUN; - CheckLast(); - //if(m_bLast == false) - // ReLoadList(); + CheckLast(); } else m_nMode = E_PROCESS_URL_RUN; @@ -176,7 +180,7 @@ bool SNaverBlogManage::Update() switch(m_nMode) { case E_PROCESS_LIST_RUN: - if (UseProcess() == false && CheckTime()) + if (UseProcess() == false) { m_strQuery = makeGetListQuery(m_strKeyword,m_date); //m_pMain->InsertLog(m_nID,"Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd")); @@ -190,7 +194,7 @@ bool SNaverBlogManage::Update() } break; case E_PROCESS_URL_RUN: - if (UseProcess() == false && CheckTime()) + if (UseProcess() == false) { m_pMain->InsertLog("(" + QString::number(m_ncUrl+1) + "/" + QString::number(m_strListURL.size()) + ")"); m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "blog_url" << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strKeywordID ); @@ -199,7 +203,7 @@ bool SNaverBlogManage::Update() } break; case E_PROCESS_COMMENT_RUN: - if (UseProcess() == false && CheckTime()) + if (UseProcess() == false) { m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "blog_comm" << makeGetCommentQuery(m_strListURL.at(m_ncUrl++)) << m_strGroupID << "" ); m_nMode = E_PROCESS_COMMENT_FINISH_WAIT; @@ -218,124 +222,13 @@ bool SNaverBlogManage::Update() m_pro[0].kill(); } } - if (m_nMode == E_PROCESS_LIST_FINISH_WAIT) {ReLoadList(); return m_bFinalLast;} + ReLoadList(); } break; } return m_bFinalLast; } -void SNaverBlogManage::MakeTables() -{ - QString strQuery = "show tables"; - QSqlQuery query; - query.exec(strQuery); - int nUrlMax = -1; - while (query.next()) - { - QString str = query.value(0).toString(); - if (str.left(C_TABLE_URL.size()) == C_TABLE_URL.toUpper()) - { - if (nUrlMax < str.mid(C_TABLE_URL.size()).toInt()) - nUrlMax = str.mid(C_TABLE_URL.size()).toInt(); - } - } - m_nUrlTable = nUrlMax + 1; - strQuery = "Create table " + C_TABLE_URL + QString::number(m_nUrlTable)+ "(Url CHAR(128) not null primary key,keyword_id INT,PlatformTitle CHAR(128),PlatformID CHAR(64),ArticleTitle VARCHAR(128),ArticleID CHAR(32),Date DATETIME,Nickname CHAR(32),Data VARCHAR(18432),Error CHAR(32)) CHARSET=utf8"; - query.exec(strQuery); - strQuery = "Create table " + C_TABLE_COM + QString::number(m_nUrlTable)+ "(Url CHAR(128) not null,Nickname CHAR(32),Data VARCHAR(1024),Parent CHAR(64),Date DATETIME,UrlReply VARCHAR(512),RowNum INT) CHARSET=utf8"; - query.exec(strQuery); -/* - strQuery = "Create table " + C_TABLE_URL + QString::number(m_nUrlTable)+ "(URL varchar(128) not null primary key , ID varchar(64) ,NICK varchar(64),TITLE varchar(256), DATA varchar(20480) , DATE char(32) , BLOG_ID varchar(64) , OTHER varchar(128), ERROR char(128)) CHARSET=utf8"; - query.exec(strQuery); - strQuery = "Create table " + C_TABLE_COM + QString::number(m_nUrlTable)+ "(URL varchar(128) not null, NICK varchar(64),DATA varchar(20480),PARENT varchar(64),DATE char(32),URL_COMMENT varchar(512) not null,URL_NICK varchar(128)) CHARSET=utf8"; - query.exec(strQuery); -*/ - - m_pMain->setWindowTitle("NaverBlogCrawler " + QString::number(m_nUrlTable)); -} - -void SNaverBlogManage::DropTables() -{ - QString strQuery = "drop table "; - QSqlQuery query; - query.exec(strQuery + C_TABLE_URL + QString::number(m_nUrlTable)); - query.exec(strQuery + C_TABLE_COM + QString::number(m_nUrlTable)); -} - -void SNaverBlogManage::Join() -{ - //m_pMain->InsertLog(m_nID,"Insert Article Data..."); - m_pMain->InsertLog("Insert Article Data..."); - QString strQuery = "insert into " - "data_" + m_strGroupID + - "(platformname , platformform , articleform ," - "url , keyword_id , body_platformtitle , body_platformid , body_articletitle , body_articleid , body_date , body_nickname , body_data)" - "select " - "CONVERT('naver' USING utf8)," - "CONVERT('blog' USING utf8)," - "CONVERT('article' USING utf8)," - "CONVERT(url USING utf8)," - "CONVERT(keyword_id USING utf8)," - "CONVERT(PlatformTitle USING utf8)," - "CONVERT(PlatformID USING utf8)," - "CONVERT(ArticleTitle USING utf8)," - "CONVERT(ArticleID USING utf8)," - "CONVERT(Date USING utf8)," - "CONVERT(Nickname USING utf8)," - "CONVERT(Data USING utf8)" - "from NAVER_BLOG_BODY_" + QString::number(m_nUrlTable); - QSqlQuery query; - if (query.exec(strQuery) == false) - { - //m_pMain->InsertLog(m_nID,query.lastError().text()); - m_pMain->InsertLog(query.lastError().text()); - return; - } - - //m_pMain->InsertLog(m_nID,"Insert Reply Data..."); - m_pMain->InsertLog("Insert Reply Data..."); - - strQuery = "insert into " - "data_" + m_strGroupID + - "(platformname , platformform , articleform ," - "url , keyword_id , body_platformtitle , body_platformid , body_articletitle , body_articleid , body_date , body_nickname , body_data ," - "reply_nickname ,reply_data, reply_parent , reply_date ,reply_urlreply ,reply_rownum )" - "select " - "CONVERT('naver' USING utf8)," - "CONVERT('blog' USING utf8)," - "CONVERT('reply' USING utf8)," - "CONVERT(_body.url USING utf8)," - "CONVERT(_body.keyword_id USING utf8)," - "CONVERT(_body.PlatformTitle USING utf8)," - "CONVERT(_body.PlatformID USING utf8)," - "CONVERT(_body.ArticleTitle USING utf8)," - "CONVERT(_body.ArticleID USING utf8)," - "CONVERT(_body.Date USING utf8)," - "CONVERT(_body.Nickname USING utf8)," - "CONVERT(_body.Data USING utf8)," - "CONVERT(_reply.Nickname USING utf8)," - "CONVERT(_reply.Data USING utf8)," - "CONVERT(_reply.Parent USING utf8)," - "CONVERT(_reply.Date USING utf8)," - "CONVERT(_reply.UrlReply USING utf8)," - "CONVERT(_reply.RowNum USING utf8) " - "from " + C_TABLE_URL + QString::number(m_nUrlTable) + " _body INNER JOIN " + C_TABLE_COM + QString::number(m_nUrlTable) + " _reply ON _body.Url = _reply.Url"; - - if (query.exec(strQuery) == false) - { - //m_pMain->InsertLog(m_nID,query.lastError().text()); - m_pMain->InsertLog(query.lastError().text()); - return; - } - //m_pMain->InsertLog(m_nID,"Delete data ..."); - m_pMain->InsertLog("Delete data ..."); - query.exec("delete from NAVER_BLOG_BODY_" + QString::number(m_nUrlTable) ); - query.exec("delete from NAVER_BLOG_REPLY_" + QString::number(m_nUrlTable) ); - //m_pMain->InsertLog(m_nID,"Finish ... "); - m_pMain->InsertLog("Finish ... "); -} - void SNaverBlogManage::ReLoadList() { m_nMode = E_PROCESS_LIST_RUN; diff --git a/CrawlerList/snaverblogmanage.h b/CrawlerList/snaverblogmanage.h index bda9bd9..a6d18fd 100644 --- a/CrawlerList/snaverblogmanage.h +++ b/CrawlerList/snaverblogmanage.h @@ -16,11 +16,6 @@ public: }; public: SNaverBlogManage(QObject *pObject); - void MakeTables(); - void DropTables(); - void SaveCsv(QString _strName); - void Join(); - int GetTableNumber() {return m_nUrlTable;} private: QString makeGetListQuery(QString _str,QDate _date); QString makeGetCommentQuery(QString _strUrl); diff --git a/CrawlerList/snavercafemanage.cpp b/CrawlerList/snavercafemanage.cpp index fba0dcc..5615c3b 100644 --- a/CrawlerList/snavercafemanage.cpp +++ b/CrawlerList/snavercafemanage.cpp @@ -47,7 +47,7 @@ bool SNaverCafeManage::Update() switch(m_nMode) { case E_PROCESS_LIST_RUN: - if (UseProcess() == false && CheckTime()) + if (UseProcess() == false) { m_strListQuery = makeGetListQuery(m_strKeyword,m_date,m_ncList); m_pMain->InsertLog("Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd")); @@ -60,7 +60,7 @@ bool SNaverCafeManage::Update() } break; case E_PROCESS_URL_RUN: - if (UseProcess() == false && CheckTime()) + if (UseProcess() == false) { m_pMain->InsertLog("(" + QString::number(m_ncUrl+1) + "/" + QString::number(m_strListURL.size()) + ")"); { @@ -84,7 +84,7 @@ bool SNaverCafeManage::Update() m_pro[0].kill(); } } - if (m_nMode == E_PROCESS_LIST_FINISH_WAIT) {ReLoadList(); return m_bFinalLast;} + ReLoadList(); } break; } @@ -102,10 +102,17 @@ void SNaverCafeManage::processFinished(QProcess *_pPro,QString _strOut) if (_strOut.right(4) == "last" || m_ncList >= 1000) m_bLast = true; + + if (m_bLast == false && m_nUntilPage > 0) + { + if ((m_ncList/10) >= m_nUntilPage) + m_bLast = true; + } + if (_strOut.right(5) == "block") { reloaded = true; - ReLoadList(); + ReLoadList(); } if(_strOut.right(7) == "loading") { diff --git a/CrawlerList/widget.cpp b/CrawlerList/widget.cpp index 29b9daf..96d0862 100644 --- a/CrawlerList/widget.cpp +++ b/CrawlerList/widget.cpp @@ -42,7 +42,7 @@ Widget::Widget(QWidget *parent) : QWidget(parent) , m_nMode(E_MODE_WAIT) return; } - for (int i = 0 ; i < C_CRAWLER_MAX ; i++) + for (int i = 0 ; i < C_PLATFORM_MAX ; i++) m_pManage[i]->SetParent(this); setWindowTitle("CrawlerList " + QString::number(QCoreApplication::applicationPid())); @@ -66,7 +66,7 @@ Widget::~Widget() } } m_db.close(); - for (int i = 0 ; i < C_CRAWLER_MAX ; i++) + for (int i = 0 ; i < C_PLATFORM_MAX ; i++) m_pManage[i]->SetParent(0); } @@ -96,11 +96,11 @@ QGroupBox *Widget::setRealGroupWidgets() vlayout->addLayout(hlayout); } - m_pedTime = new QLineEdit(this); - m_pedTime->setText(QString("3")); + m_pedUntilPage = new QLineEdit(this); + m_pedUntilPage->setText(QString("0")); { QHBoxLayout *hlayout = new QHBoxLayout; - hlayout->addWidget(m_pedTime); + hlayout->addWidget(m_pedUntilPage); hlayout->addWidget(pbtStart); hlayout->addWidget(pbtStop); vlayout->addLayout(hlayout); @@ -178,7 +178,7 @@ void Widget::Start() if (query.next() == false) return; m_nPlatform = 0; - if(0 <= query.value(6).toInt() && query.value(6).toInt() < C_CRAWLER_MAX) + if(0 <= query.value(6).toInt() && query.value(6).toInt() < C_PLATFORM_MAX) m_nPlatform = query.value(6).toInt(); else { @@ -188,13 +188,13 @@ void Widget::Start() if (m_pcheckboxReal->isChecked()) { - m_pManage[m_nPlatform]->Start(QDate::currentDate(),QDate::currentDate(), + m_pManage[m_nPlatform]->Start(QDate::currentDate(),QDate::currentDate().addDays(1), query.value(2).toString().trimmed(),// keyword query.value(3).toString().trimmed(),// authorship query.value(4).toString().trimmed(),// keyword_id query.value(5).toString().trimmed(), 1, - m_pedTime->text().trimmed()); + m_pedUntilPage->text().trimmed().toInt()); } else { @@ -205,7 +205,7 @@ void Widget::Start() query.value(4).toString().trimmed(),// keyword_id query.value(5).toString().trimmed(), 1, - m_pedTime->text().trimmed()); + m_pedUntilPage->text().trimmed().toInt()); } SetCrawlingState("Start"); m_nMode = E_MODE_RUN; diff --git a/CrawlerList/widget.h b/CrawlerList/widget.h index 60608b1..7436d88 100644 --- a/CrawlerList/widget.h +++ b/CrawlerList/widget.h @@ -38,11 +38,11 @@ public: }; private: - QLineEdit *m_pedTime; + QLineEdit *m_pedUntilPage; QTimer m_timer; QSqlDatabase m_db; - static const int C_CRAWLER_MAX = 3; - SManage *m_pManage[C_CRAWLER_MAX]; + static const int C_PLATFORM_MAX = 3; + SManage *m_pManage[C_PLATFORM_MAX]; QListWidget *m_pResultList; QString m_strFileName; QComboBox *m_pcb;