뉴스 크롤러 추가
git-svn-id: svn://192.168.0.12/source@149 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -17,11 +17,13 @@ SOURCES += main.cpp\
|
||||
smanage.cpp \
|
||||
snavercafemanage.cpp \
|
||||
snaverblogmanage.cpp \
|
||||
sdaumcafemanage.cpp
|
||||
sdaumcafemanage.cpp \
|
||||
snavernewsmanage.cpp
|
||||
|
||||
HEADERS += widget.h \
|
||||
smanage.h \
|
||||
snavercafemanage.h \
|
||||
snaverblogmanage.h \
|
||||
sdaumcafemanage.h
|
||||
sdaumcafemanage.h \
|
||||
snavernewsmanage.h
|
||||
|
||||
|
||||
@@ -65,6 +65,38 @@ QString SManage::EncodetoUtf8(QString _str,bool _bExt)
|
||||
return strOut;
|
||||
}
|
||||
|
||||
QString SManage::EncodetoEucKr(QString _str,bool _bExt)
|
||||
{
|
||||
QString strOut;
|
||||
for (int i = 0; i < _str.length(); i++)
|
||||
{
|
||||
QString strTran = _str.at(i);
|
||||
QByteArray byte = strTran.toLocal8Bit();
|
||||
if (byte.length() == 2)
|
||||
{
|
||||
QByteArray byteHan = byte.toHex().toUpper();
|
||||
for (int j = 0; j < 2 ; j++)
|
||||
{
|
||||
strOut += "%";
|
||||
for (int k = 0; k < 2; k++)
|
||||
strOut += byteHan.at(j*2+k);
|
||||
}
|
||||
}
|
||||
else if (_str.at(i).isDigit() || _str.at(i).isLower() || _str.at(i).isUpper())
|
||||
strOut += _str.at(i);
|
||||
else if (_bExt && _str.at(i) == '@' || _str.at(i) == '*' || _str.at(i) == '_' || _str.at(i) == '.')
|
||||
strOut += _str.at(i);
|
||||
else if (_bExt && _str.at(i) == ' ')
|
||||
strOut += '+';
|
||||
else
|
||||
{
|
||||
strOut += "%";
|
||||
strOut += byte.toHex().toUpper();
|
||||
}
|
||||
}
|
||||
return strOut;
|
||||
}
|
||||
|
||||
bool SManage::UseProcess()
|
||||
{
|
||||
for(int i = 0; i < C_PROCESS_MAX ; i++)
|
||||
|
||||
@@ -28,6 +28,7 @@ public:
|
||||
protected:
|
||||
virtual void Start() = 0;
|
||||
QString EncodetoUtf8(QString _str,bool _bExt=false);
|
||||
QString EncodetoEucKr(QString _str,bool _bExt=false);
|
||||
virtual void processFinished(QProcess *pPro,QString _strOut) = 0;
|
||||
bool UseProcess();
|
||||
void CheckLast();
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
SNaverCafeManage::SNaverCafeManage(QObject *pObject) : SManage(pObject)
|
||||
{
|
||||
m_nID = 0;
|
||||
m_nID = 0;
|
||||
}
|
||||
|
||||
QString SNaverCafeManage::makeGetListQuery(QString _str,QDate _date,int _nPage)
|
||||
|
||||
163
CrawlerList/snavernewsmanage.cpp
Normal file
163
CrawlerList/snavernewsmanage.cpp
Normal file
@@ -0,0 +1,163 @@
|
||||
#include "snavernewsmanage.h"
|
||||
#include "widget.h"
|
||||
#include <QDebug>
|
||||
|
||||
SNaverNewsManage::SNaverNewsManage(QObject *pObject) : SManage(pObject)
|
||||
{
|
||||
m_nID = 0;
|
||||
}
|
||||
|
||||
QString SNaverNewsManage::makeGetListQuery(QString _str,QDate _date,int _nPage)
|
||||
{
|
||||
QString strOut = "http://news.naver.com/main/search/search.nhn?query=";
|
||||
strOut += EncodetoEucKr(_str,true);
|
||||
strOut += "&st=news.all&q_enc=EUC-KR&r_enc=UTF-8&r_format=xml&rp=none&sm=all.basic&ic=all&ie=MS949&so=datetime.dsc&stDate=range:";
|
||||
strOut += _date.toString("yyyyMMdd:yyyyMMdd");
|
||||
strOut += "&detail=1&pd=4&start=1&display=25&startDate=";
|
||||
strOut += _date.toString("yyyy-MM-dd");
|
||||
strOut += "&endDate=";
|
||||
strOut += _date.toString("yyyy-MM-dd");
|
||||
strOut += "&page=" + QString::number(_nPage);
|
||||
return strOut;
|
||||
}
|
||||
|
||||
void SNaverNewsManage::Start()
|
||||
{
|
||||
m_nMode = E_PROCESS_LIST_RUN;
|
||||
m_bFinalLast = false;
|
||||
}
|
||||
|
||||
bool SNaverNewsManage::Update()
|
||||
{
|
||||
if (m_bFinalLast) return m_bFinalLast;
|
||||
|
||||
switch(m_nMode)
|
||||
{
|
||||
case E_PROCESS_LIST_RUN:
|
||||
if (UseProcess() == false)
|
||||
{
|
||||
m_strListQuery = makeGetListQuery(m_strKeyword,m_date,m_ncList);
|
||||
m_pMain->InsertLog("Start : " + QString::number(m_ncList) + " Date : " + m_date.toString("yyyy-MM-dd"));
|
||||
{
|
||||
m_pro[0].start("CrawlerProcess",QStringList()<< "naver" << "news_list" << m_strListQuery);
|
||||
m_ncList++;
|
||||
}
|
||||
m_nMode = E_PROCESS_LIST_FINISH_WAIT;
|
||||
m_nWait = 0;
|
||||
}
|
||||
break;
|
||||
case E_PROCESS_URL_RUN:
|
||||
if (UseProcess() == false)
|
||||
{
|
||||
m_pMain->InsertLog("(" + QString::number(m_ncUrl+1) + "/" + QString::number(m_strListURL.size()) + ")");
|
||||
{
|
||||
m_pro[0].start("AjaxCrawlerProcess",QStringList() << m_strListURL.at(m_ncUrl) << m_strGroupID << m_strKeywordID );
|
||||
m_ncUrl++;
|
||||
}
|
||||
m_nMode = E_PROCESS_URL_FINISH_WAIT;
|
||||
m_nWait = 0;
|
||||
}
|
||||
break;
|
||||
case E_PROCESS_LIST_FINISH_WAIT:
|
||||
case E_PROCESS_URL_FINISH_WAIT:
|
||||
m_nWait++;
|
||||
if (m_nWait >= 300)
|
||||
{
|
||||
m_pMain->InsertLog("Kill Process.");
|
||||
m_pro[0].kill();
|
||||
ReLoadList();
|
||||
}
|
||||
break;
|
||||
}
|
||||
return m_bFinalLast;
|
||||
}
|
||||
|
||||
void SNaverNewsManage::processFinished(QProcess *_pPro,QString _strOut)
|
||||
{
|
||||
switch(m_nMode)
|
||||
{
|
||||
case E_PROCESS_LIST_FINISH_WAIT:
|
||||
{
|
||||
m_bLast = false;
|
||||
bool reloaded = false;
|
||||
|
||||
if (_strOut.right(4) == "last" || m_ncList >= 160)
|
||||
m_bLast = true;
|
||||
|
||||
if (m_bLast == false && m_nUntilPage > 0)
|
||||
{
|
||||
if (m_ncList >= m_nUntilPage)
|
||||
m_bLast = true;
|
||||
}
|
||||
|
||||
if (_strOut.right(5) == "block")
|
||||
{
|
||||
reloaded = true;
|
||||
ReLoadList();
|
||||
}
|
||||
if(_strOut.right(7) == "loading")
|
||||
{
|
||||
reloaded = true;
|
||||
ReLoadList();
|
||||
}
|
||||
|
||||
QStringList strOutList = _strOut.split("\n");
|
||||
|
||||
if(strOutList.length() > 2)
|
||||
{
|
||||
if(_strOut.split("\n").at(2).trimmed().length() == 0)
|
||||
{
|
||||
if(reloaded == false)
|
||||
{
|
||||
ReLoadList();
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(reloaded == false)
|
||||
{
|
||||
ReLoadList();
|
||||
}
|
||||
}
|
||||
|
||||
m_strListURL.clear();
|
||||
foreach(QString str,_strOut.split("\n"))
|
||||
{
|
||||
if (str.isEmpty()) continue;
|
||||
if (str.at(0) == QChar('o'))
|
||||
m_strListURL.push_back(str.right(str.length()-2).trimmed());
|
||||
}
|
||||
m_ncUrl = 0;
|
||||
if (m_strListURL.size() == 0)
|
||||
{
|
||||
m_nMode = E_PROCESS_LIST_RUN;
|
||||
CheckLast();
|
||||
//if(m_bLast == false)
|
||||
//ReLoadList();
|
||||
}
|
||||
else
|
||||
m_nMode = E_PROCESS_URL_RUN;
|
||||
break;
|
||||
}
|
||||
case E_PROCESS_URL_FINISH_WAIT:
|
||||
m_nMode = E_PROCESS_LIST_RUN;
|
||||
if (m_ncUrl >= m_strListURL.size())
|
||||
{
|
||||
m_nMode = E_PROCESS_LIST_RUN;
|
||||
CheckLast();
|
||||
m_bLast = false;
|
||||
}
|
||||
else
|
||||
m_nMode = E_PROCESS_URL_RUN;
|
||||
break;
|
||||
}
|
||||
}
|
||||
void SNaverNewsManage::ReLoadList()
|
||||
{
|
||||
m_nMode = E_PROCESS_LIST_RUN;
|
||||
m_ncList--;
|
||||
if(m_ncList < 0)
|
||||
m_ncList = 1;
|
||||
m_pMain->InsertLog("Reload List");
|
||||
}
|
||||
29
CrawlerList/snavernewsmanage.h
Normal file
29
CrawlerList/snavernewsmanage.h
Normal file
@@ -0,0 +1,29 @@
|
||||
#ifndef SNAVERNEWSMANAGE_H
|
||||
#define SNAVERNEWSMANAGE_H
|
||||
|
||||
#include "smanage.h"
|
||||
|
||||
class SNaverNewsManage : public SManage
|
||||
{
|
||||
public:
|
||||
SNaverNewsManage(QObject *pObject);
|
||||
enum E_PROCESS_STATE
|
||||
{
|
||||
E_PROCESS_LIST_RUN = 0,
|
||||
E_PROCESS_LIST_FINISH_WAIT,
|
||||
E_PROCESS_URL_RUN,
|
||||
E_PROCESS_URL_FINISH_WAIT,
|
||||
};
|
||||
private:
|
||||
QString makeGetListQuery(QString _str,QDate _date,int _nPage);
|
||||
private:
|
||||
QString m_strListQuery;
|
||||
QVector <QString> m_strListURL;
|
||||
protected:
|
||||
bool Update();
|
||||
void Start();
|
||||
void processFinished(QProcess *pPro,QString _strOut);
|
||||
void ReLoadList();
|
||||
};
|
||||
|
||||
#endif // SNAVERNEWSMANAGE_H
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "snavercafemanage.h"
|
||||
#include "snaverblogmanage.h"
|
||||
#include "sdaumcafemanage.h"
|
||||
#include "snavernewsmanage.h"
|
||||
#include <QApplication>
|
||||
|
||||
Widget::Widget(QWidget *parent) : QWidget(parent) , m_nMode(E_MODE_WAIT)
|
||||
@@ -27,15 +28,19 @@ Widget::Widget(QWidget *parent) : QWidget(parent) , m_nMode(E_MODE_WAIT)
|
||||
m_pNaverCafe = new SNaverCafeManage(this);
|
||||
m_pNaverBlog = new SNaverBlogManage(this);
|
||||
m_pDaumCafe = new SDaumCafeManage(this);
|
||||
m_pNaverNews = new SNaverNewsManage(this);
|
||||
m_pManage[0] = m_pNaverCafe;
|
||||
m_pManage[1] = m_pNaverBlog;
|
||||
m_pManage[2] = m_pDaumCafe;
|
||||
m_pManage[2] = m_pDaumCafe;
|
||||
m_pManage[3] = m_pNaverNews;
|
||||
|
||||
m_db = QSqlDatabase::addDatabase("QMYSQL");
|
||||
m_db.setHostName("bigbird.iptime.org");
|
||||
m_db.setUserName("admin");
|
||||
m_db.setPassword("admin123");
|
||||
m_db.setDatabaseName("concepters");
|
||||
m_db.setConnectOptions("CLIENT_INTERACTIVE=999999999;");
|
||||
m_db.setConnectOptions("MYSQL_OPT_RECONNECT=true;");
|
||||
if (!m_db.open())
|
||||
{
|
||||
InsertLog("MySql Error...");
|
||||
@@ -271,6 +276,7 @@ void Widget::RefreshButton()
|
||||
case 0:str += ", Naver Cafe";break;
|
||||
case 1:str += ", Naver Blog";break;
|
||||
case 2:str += ", Daum Cafe"; break;
|
||||
case 3:str += ", Naver News"; break;
|
||||
}
|
||||
m_pcb->addItem(str,query.value(7));
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
class SNaverCafeManage;
|
||||
class SNaverBlogManage;
|
||||
class SDaumCafeManage;
|
||||
class SNaverNewsManage;
|
||||
class SManage;
|
||||
|
||||
#define SAFE_DELETE(p) {if(p) delete (p); (p) = NULL; }
|
||||
@@ -41,7 +42,7 @@ private:
|
||||
QLineEdit *m_pedUntilPage;
|
||||
QTimer m_timer;
|
||||
QSqlDatabase m_db;
|
||||
static const int C_PLATFORM_MAX = 3;
|
||||
static const int C_PLATFORM_MAX = 4;
|
||||
SManage *m_pManage[C_PLATFORM_MAX];
|
||||
QListWidget *m_pResultList;
|
||||
QString m_strFileName;
|
||||
@@ -50,6 +51,7 @@ private:
|
||||
SNaverCafeManage *m_pNaverCafe;
|
||||
SNaverBlogManage *m_pNaverBlog;
|
||||
SDaumCafeManage *m_pDaumCafe;
|
||||
SNaverNewsManage *m_pNaverNews;
|
||||
int m_nStartTime,m_nRangeTime,m_nPlatform;
|
||||
//QGroupBox *m_pgbManual;
|
||||
QCheckBox *m_pcheckboxReal;
|
||||
|
||||
Reference in New Issue
Block a user