diff --git a/CrawlerList/CrawlerList.pro b/CrawlerList/CrawlerList.pro index e402a00..db12138 100644 --- a/CrawlerList/CrawlerList.pro +++ b/CrawlerList/CrawlerList.pro @@ -4,7 +4,7 @@ # #------------------------------------------------- -QT += core gui sql +QT += core gui sql network greaterThan(QT_MAJOR_VERSION, 4): QT += widgets @@ -13,6 +13,11 @@ TEMPLATE = app SOURCES += main.cpp\ - widget.cpp + widget.cpp\ + smanage.cpp \ + snavercafemanage.cpp + +HEADERS += widget.h \ + smanage.h \ + snavercafemanage.h -HEADERS += widget.h diff --git a/CrawlerList/smanage.cpp b/CrawlerList/smanage.cpp index 2e9f94f..3117a78 100644 --- a/CrawlerList/smanage.cpp +++ b/CrawlerList/smanage.cpp @@ -8,17 +8,34 @@ SManage::SManage(QObject *parent) : connect(&m_pro[i],SIGNAL(finished(int,QProcess::ExitStatus)),SLOT(processFinished(int,QProcess::ExitStatus))); } -void SManage::Start(QDate _StartDate,QDate _EndDate,QString _strKeyword,QString _strAuthorship,QString _strKeywordID,QString _strGroupID,int _nStart,int _nTime) +void SManage::Start(QDate _StartDate,QDate _EndDate,QString _strKeyword,QString _strAuthorship,QString _strKeywordID,QString _strGroupID,int _nStart,QString _strTime) { m_date = _StartDate; m_dateEnd = _EndDate; m_strKeyword = _strKeyword; - m_bFinalLast = false; - m_nTime = _nTime; + m_bFinalLast = false; m_strKeywordID = _strKeywordID; m_strAuthorship = _strAuthorship; m_ncList = _nStart; m_strGroupID = _strGroupID; + QStringList strList = _strTime.split("~"); + switch(strList.size()) + { + case 0: + m_nStartTime = 3; + m_nRangeTime = 1; + break; + case 1: + m_nStartTime = strList.at(0).toInt(); + m_nRangeTime = 1; + break; + case 2: + m_nStartTime = strList.at(0).toInt(); + m_nRangeTime = strList.at(1).toInt() - strList.at(0).toInt(); + break; + } + m_timeEnd = QDateTime::currentDateTime(); + m_timeEnd = m_timeEnd.addSecs(rand() % m_nRangeTime + m_nStartTime); Start(); } @@ -71,17 +88,16 @@ bool SManage::UseProcess() void SManage::processFinished(int exitCode,QProcess::ExitStatus exitStatus) { - SProcess *pPro = (SProcess*)sender(); + QProcess *pPro = (QProcess*)sender(); QString str = pPro->readAllStandardOutput(); QStringList list = str.split("\n"); foreach(QString log,list) { if (m_pMain) - m_pMain->InsertLog(m_nID,log); + m_pMain->InsertLog(log); else exit(0); } - pPro->SetState(SProcess::STATE_WAIT); processFinished(pPro,str); pPro->kill(); } @@ -106,10 +122,20 @@ void SManage::WaitExitProcess() { for(int i = 0; i < C_PROCESS_MAX ; i++) { - if (m_pro[i].State() != SProcess::STATE_WAIT) + if (m_pro[i].state() == QProcess::Running) m_pro[i].kill(); } bQuit = UseProcess(); } } +bool SManage::CheckTime() +{ + if (QDateTime::currentDateTime() > m_timeEnd) + { + m_timeEnd = QDateTime::currentDateTime(); + m_timeEnd = m_timeEnd.addSecs(rand() % m_nRangeTime + m_nStartTime); + return true; + } + return false; +} diff --git a/CrawlerList/smanage.h b/CrawlerList/smanage.h index 4b28be5..0884560 100644 --- a/CrawlerList/smanage.h +++ b/CrawlerList/smanage.h @@ -4,50 +4,52 @@ #include #include #include +#include #include #include -#include "SProcess.h" +#include class Widget; class SManage : public QObject { Q_OBJECT +private: + QVector m_vecList; +private slots: + void processFinished(int exitCode, QProcess::ExitStatus exitStatus); public: explicit SManage(QObject *parent = 0); public: - void Start(QDate _StartDate,QDate _EndDate,QString _strKeyword,QString _strAuthorship,QString _strKeywordID,QString _strGroupID,int _nStart,int _nTime); + void Start(QDate _StartDate,QDate _EndDate,QString _strKeyword,QString _strAuthorship,QString _strKeywordID,QString _strGroupID,int _nStart,QString _strTime); void SetParent(Widget *pWidget); void WaitExitProcess(); virtual bool Update() = 0; -signals: -public slots: protected: virtual void Start() = 0; QString EncodetoUtf8(QString _str,bool _bExt=false); - virtual void processFinished(SProcess *pPro,QString _strOut) = 0; + virtual void processFinished(QProcess *pPro,QString _strOut) = 0; bool UseProcess(); void CheckLast(); + bool CheckTime(); protected: Widget *m_pMain; QDate m_date,m_dateEnd; + QDateTime m_timeEnd; int m_nMode; QString m_strKeyword; QString m_strKeywordID; QString m_strGroupID; QString m_strAuthorship; - bool m_bFinalLast; - int m_nTime; + bool m_bFinalLast; int m_nID; bool m_bLast; int m_ncList; int m_ncUrl; int m_nWait; + int m_nStartTime; + int m_nRangeTime; static const int C_PROCESS_MAX = 1; - SProcess m_pro[C_PROCESS_MAX]; -private: - QVector m_vecList; -private slots: - void processFinished(int exitCode, QProcess::ExitStatus exitStatus); + QProcess m_pro[C_PROCESS_MAX]; }; #endif // SMANAGE_H diff --git a/CrawlerList/snavercafemanage.cpp b/CrawlerList/snavercafemanage.cpp index 2c437ca..171ef88 100644 --- a/CrawlerList/snavercafemanage.cpp +++ b/CrawlerList/snavercafemanage.cpp @@ -4,6 +4,7 @@ #include #include #include +#include SNaverCafeManage::SNaverCafeManage(QObject *pObject) : SManage(pObject) { @@ -45,13 +46,12 @@ bool SNaverCafeManage::Update() switch(m_nMode) { case E_PROCESS_LIST_RUN: - if (UseProcess() == false) + if (UseProcess() == false && CheckTime()) { m_strListQuery = makeGetListQuery(m_strKeyword,m_date,m_ncList); - m_pMain->InsertLog(m_nID,"Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd")); + m_pMain->InsertLog("Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd")); { - m_pro[0].start("CrawlerProcess",QStringList()<< "naver" << "cafe_list" << m_strListQuery << m_strGroupID << m_strKeywordID); - m_pro[0].SetState(SProcess::STATE_RUNNING); + m_pro[0].start("CrawlerProcess",QStringList()<< "naver" << "cafe_list" << m_strListQuery << m_strGroupID << m_strKeywordID); m_ncList+=10; } m_nMode = E_PROCESS_LIST_FINISH_WAIT; @@ -59,12 +59,11 @@ bool SNaverCafeManage::Update() } break; case E_PROCESS_URL_RUN: - if (UseProcess() == false) + if (UseProcess() == false && CheckTime()) { - m_pMain->InsertLog(m_nID,"(" + QString::number(m_ncUrl+1) + "/" + QString::number(m_strListURL.size()) + ")"); + m_pMain->InsertLog("(" + QString::number(m_ncUrl+1) + "/" + QString::number(m_strListURL.size()) + ")"); { - m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "cafe_data" << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strListQuery << "" ); - m_pro[0].SetState(SProcess::STATE_RUNNING); + m_pro[0].start("CrawlerProcess",QStringList() << "naver" << "cafe_data" << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strListQuery << "" ); m_ncUrl++; } m_nMode = E_PROCESS_URL_FINISH_WAIT; @@ -74,15 +73,14 @@ bool SNaverCafeManage::Update() case E_PROCESS_LIST_FINISH_WAIT: case E_PROCESS_URL_FINISH_WAIT: m_nWait++; - if (m_nWait > (100000/m_nTime)) + if (m_nWait >= 60) { //for(int i = 0; i < C_PROCESS_MAX ; i++) { - if (m_pro[0].State() != SProcess::STATE_WAIT) + if (m_pro[0].state() == QProcess::Running) { m_pro[0].kill(); - m_pro[0].SetState(SProcess::STATE_WAIT); - m_pMain->InsertLog(m_nID,"Kill Process."); + m_pMain->InsertLog("Kill Process."); } } if (m_nMode == E_PROCESS_LIST_FINISH_WAIT) return m_bFinalLast; @@ -92,7 +90,7 @@ bool SNaverCafeManage::Update() return m_bFinalLast; } -void SNaverCafeManage::processFinished(SProcess *_pPro,QString _strOut) +void SNaverCafeManage::processFinished(QProcess *_pPro,QString _strOut) { switch(m_nMode) { @@ -101,6 +99,10 @@ void SNaverCafeManage::processFinished(SProcess *_pPro,QString _strOut) m_bLast = false; if (_strOut.right(4) == "last" || m_ncList >= 1000) m_bLast = true; + + if (_strOut.right(5) == "block") + m_bFinalLast = true; + m_strListURL.clear(); foreach(QString str,_strOut.split("\n")) { @@ -108,16 +110,6 @@ void SNaverCafeManage::processFinished(SProcess *_pPro,QString _strOut) if (str.at(0) == QChar('o')) m_strListURL.push_back(str.right(str.length()-2).trimmed()); } - - /* - QSqlQuery query; - if (query.exec("SELECT URL FROM " + C_TABLE_URL + QString::number(m_nUrlTable) + " where ERROR is null")) - { - m_pMain->InsertLog(m_nID,query.lastError().text()); - } - while (query.next()) - m_strListURL.append(query.value(0).toString()); - */ m_ncUrl = 0; if (m_strListURL.size() == 0) { @@ -141,105 +133,3 @@ void SNaverCafeManage::processFinished(SProcess *_pPro,QString _strOut) break; } } - -/* -void SNaverCafeManage::MakeTables() -{ - QString strQuery = "show tables"; - QSqlQuery query; - query.exec(strQuery); - int nUrlMax = -1; - while (query.next()) - { - QString str = query.value(0).toString(); - if (str.left(C_TABLE_URL.size()) == C_TABLE_URL.toUpper()) - { - if (nUrlMax < str.mid(C_TABLE_URL.size()).toInt()) - nUrlMax = str.mid(C_TABLE_URL.size()).toInt(); - } - } - m_nUrlTable = nUrlMax + 1; - strQuery = "Create table " + C_TABLE_URL + QString::number(m_nUrlTable)+ "(Url CHAR(128) not null primary key,keyword_id INT,PlatformTitle CHAR(128),PlatformID CHAR(64),ArticleTitle VARCHAR(128),ArticleID CHAR(32),Date DATETIME,Nickname CHAR(32),Data VARCHAR(18432),Error CHAR(32)) CHARSET=utf8"; - query.exec(strQuery); - strQuery = "Create table " + C_TABLE_COM + QString::number(m_nUrlTable)+ "(Url CHAR(128) not null,Nickname CHAR(32),Data VARCHAR(1024),Parent CHAR(64),Date DATETIME,UrlReply VARCHAR(512),RowNum INT) CHARSET=utf8"; - query.exec(strQuery); - - m_pMain->setWindowTitle("NaverCafeCrawler " + QString::number(m_nUrlTable)); -} - -void SNaverCafeManage::DropTables() -{ - QString strQuery = "drop table "; - QSqlQuery query; - query.exec(strQuery + C_TABLE_URL + QString::number(m_nUrlTable)); - query.exec(strQuery + C_TABLE_COM + QString::number(m_nUrlTable)); -} - -void SNaverCafeManage::Join() -{ - m_pMain->InsertLog(m_nID,"Insert Article Data..."); - QString strQuery = "insert into " - "data_" + m_strGroupID + - "(platformname , platformform , articleform ," - "url , keyword_id , body_platformtitle , body_platformid , body_articletitle , body_articleid , body_date , body_nickname , body_data)" - "select " - "CONVERT('naver' USING utf8)," - "CONVERT('cafe' USING utf8)," - "CONVERT('article' USING utf8)," - "CONVERT(url USING utf8)," - "CONVERT(keyword_id USING utf8)," - "CONVERT(PlatformTitle USING utf8)," - "CONVERT(PlatformID USING utf8)," - "CONVERT(ArticleTitle USING utf8)," - "CONVERT(ArticleID USING utf8)," - "CONVERT(Date USING utf8)," - "CONVERT(Nickname USING utf8)," - "CONVERT(Data USING utf8)" - "from NAVER_CAFE_BODY_" + QString::number(m_nUrlTable); - QSqlQuery query; - if (query.exec(strQuery) == false) - { - m_pMain->InsertLog(m_nID,query.lastError().text()); - return; - } - - m_pMain->InsertLog(m_nID,"Insert Reply Data..."); - - strQuery = "insert into " - "data_" + m_strGroupID + - "(platformname , platformform , articleform ," - "url , keyword_id , body_platformtitle , body_platformid , body_articletitle , body_articleid , body_date , body_nickname , body_data ," - "reply_nickname ,reply_data, reply_parent , reply_date ,reply_urlreply ,reply_rownum )" - "select " - "CONVERT('naver' USING utf8)," - "CONVERT('cafe' USING utf8)," - "CONVERT('reply' USING utf8)," - "CONVERT(_body.url USING utf8)," - "CONVERT(_body.keyword_id USING utf8)," - "CONVERT(_body.PlatformTitle USING utf8)," - "CONVERT(_body.PlatformID USING utf8)," - "CONVERT(_body.ArticleTitle USING utf8)," - "CONVERT(_body.ArticleID USING utf8)," - "CONVERT(_body.Date USING utf8)," - "CONVERT(_body.Nickname USING utf8)," - "CONVERT(_body.Data USING utf8)," - "CONVERT(_reply.Nickname USING utf8)," - "CONVERT(_reply.Data USING utf8)," - "CONVERT(_reply.Parent USING utf8)," - "CONVERT(_reply.Date USING utf8)," - "CONVERT(_reply.UrlReply USING utf8)," - "CONVERT(_reply.RowNum USING utf8) " - "from NAVER_CAFE_BODY_" + QString::number(m_nUrlTable) + " _body INNER JOIN NAVER_CAFE_REPLY_" + QString::number(m_nUrlTable) + " _reply ON _body.Url = _reply.Url"; - query.exec(strQuery); - - if (query.exec(strQuery) == false) - { - m_pMain->InsertLog(m_nID,query.lastError().text()); - return; - } - m_pMain->InsertLog(m_nID,"Delete data ..."); - query.exec("delete from NAVER_CAFE_BODY_" + QString::number(m_nUrlTable) ); - query.exec("delete from NAVER_CAFE_REPLY_" + QString::number(m_nUrlTable) ); - m_pMain->InsertLog(m_nID,"Finish ... "); -} -*/ diff --git a/CrawlerList/snavercafemanage.h b/CrawlerList/snavercafemanage.h index b8d7276..eba045f 100644 --- a/CrawlerList/snavercafemanage.h +++ b/CrawlerList/snavercafemanage.h @@ -17,12 +17,11 @@ private: QString makeGetListQuery(QString _str,QDate _date,int _nPage); private: QString m_strListQuery; - QVector m_strListURL; - int m_nUrlTable; + QVector m_strListURL; protected: bool Update(); void Start(); - void processFinished(SProcess *pPro,QString _strOut); + void processFinished(QProcess *pPro,QString _strOut); }; #endif // SNAVERCAFEMANAGE_H diff --git a/CrawlerList/widget.cpp b/CrawlerList/widget.cpp index b73e03d..fd3dc5b 100644 --- a/CrawlerList/widget.cpp +++ b/CrawlerList/widget.cpp @@ -21,7 +21,7 @@ Widget::Widget(QWidget *parent) : QWidget(parent) QPushButton *pbtStop = new QPushButton("Stop",this); { QObject::connect(pbtStart,SIGNAL(clicked()),this,SLOT(StartButton())); - QObject::connect(pbtStop,SIGNAL(clicked()),this,SLOT(StartButton())); + QObject::connect(pbtStop,SIGNAL(clicked()),this,SLOT(StopButton())); QObject::connect(pbtRefresh,SIGNAL(clicked()),this,SLOT(RefreshButton())); } @@ -34,8 +34,7 @@ Widget::Widget(QWidget *parent) : QWidget(parent) } m_pedTime = new QLineEdit(this); - m_pedTime->setText(QString("500")); - + m_pedTime->setText(QString("3~10")); { QHBoxLayout *hlayout = new QHBoxLayout; hlayout->addWidget(m_pedTime); @@ -45,9 +44,9 @@ Widget::Widget(QWidget *parent) : QWidget(parent) } { - QHBoxLayout *hlayout = new QHBoxLayout; - for (int i = 0; i < C_CRAWLER_MAX; i++ ) - hlayout->addWidget(&m_aResultList[i]); + QHBoxLayout *hlayout = new QHBoxLayout; + m_pResultList = new QListWidget; + hlayout->addWidget(m_pResultList); vlayout->addLayout(hlayout); } @@ -62,7 +61,7 @@ Widget::Widget(QWidget *parent) : QWidget(parent) m_db.setDatabaseName("concepters"); if (!m_db.open()) { - InsertLog(0,"MySql Error..."); + InsertLog("MySql Error..."); return; } @@ -78,16 +77,14 @@ Widget::~Widget() m_pNaverCafe->SetParent(0); } -void Widget::InsertLog(int _nSelect,QString str) +void Widget::InsertLog(QString str) { - if (_nSelect >= C_CRAWLER_MAX) return; - QTime time = QTime::currentTime(); QString strOut = time.toString("[hh:mm:ss] ") + str; - m_aResultList[_nSelect].addItem(strOut); + m_pResultList->addItem(strOut); QDate date = QDate::currentDate(); - QFile file(date.toString(Qt::ISODate)+"_"+QString::number(_nSelect)+".log"); + QFile file(date.toString(Qt::ISODate)+".log"); if (!file.open(QIODevice::WriteOnly | QIODevice::Text | QIODevice::Append)) return; @@ -95,22 +92,22 @@ void Widget::InsertLog(int _nSelect,QString str) out << strOut << "\n"; file.close(); - if (m_aResultList[_nSelect].count() > 1024) + if (m_pResultList->count() > 1024) { - m_aResultList[_nSelect].removeItemWidget(m_aResultList[_nSelect].item(0)); - QListWidgetItem* item = m_aResultList[_nSelect].takeItem(0); + m_pResultList->removeItemWidget(m_pResultList->item(0)); + QListWidgetItem* item = m_pResultList->takeItem(0); delete item; } - m_aResultList[_nSelect].setCurrentRow( m_aResultList[_nSelect].count() - 1 ); - m_aResultList[_nSelect].repaint(); + m_pResultList->setCurrentRow( m_pResultList->count() - 1 ); + m_pResultList->repaint(); } void Widget::StartButton() -{ +{ m_timer.stop(); - m_timer.start(m_pedTime->text().trimmed().toInt()); + m_timer.start(1000); - QSqlQuery query; + QSqlQuery query; query.exec("UPDATE crawling set state = '" + QString("run") + "' where id = '" + m_pcb->currentData().toString() + "'"); query.exec("SELECT _keyword.start,_keyword.end, _keyword.searches,_keyword.authorship,_keyword.id,_datagroup.id " "FROM crawling _crawling INNER JOIN keyword _keyword ON _crawling.keyword_id = _keyword.id " @@ -128,7 +125,7 @@ void Widget::StartButton() query.value(4).toString().trimmed(),// keyword_id query.value(5).toString().trimmed(), 1, - m_pedTime->text().trimmed().toInt()); + m_pedTime->text().trimmed()); } void Widget::StopButton() @@ -142,18 +139,18 @@ void Widget::Update() { if (m_db.open()) { - InsertLog(0,"MySql Open Error..."); + InsertLog("MySql Open Error..."); m_timer.stop(); return; } } int nCount = 0; for (int i = 0 ; i < C_CRAWLER_MAX ; i++) - nCount += m_pManage[i]->Update(); + nCount += m_pManage[0]->Update(); if (nCount == C_CRAWLER_MAX) { - InsertLog(0,"Finish..."); - m_timer.stop(); + InsertLog("Finish..."); + m_timer.stop(); } } diff --git a/CrawlerList/widget.h b/CrawlerList/widget.h index 519e3e0..3513730 100644 --- a/CrawlerList/widget.h +++ b/CrawlerList/widget.h @@ -21,19 +21,19 @@ public: Widget(QWidget *parent = 0); ~Widget(); public: - void InsertLog(int _nSelect,QString str); + void InsertLog(QString str); private: QLineEdit *m_pedTime; QTimer m_timer; QSqlDatabase m_db; static const int C_CRAWLER_MAX = 1; SManage *m_pManage[C_CRAWLER_MAX]; - QListWidget m_aResultList[C_CRAWLER_MAX]; + QListWidget *m_pResultList; QString m_strFileName; QComboBox *m_pcb; QVector m_vecSelect; - SNaverCafeManage *m_pNaverCafe; + int m_nStartTime,m_nRangeTime; private: QString makeCafeGetListQuery(QString _str,QDate _date,int _nPage); private slots: