뉴스 크롤러 기능 추가
git-svn-id: svn://192.168.0.12/source@151 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
22
AjaxCrawlerProcess/AjaxCrawlerProcess.pro
Normal file
22
AjaxCrawlerProcess/AjaxCrawlerProcess.pro
Normal file
@@ -0,0 +1,22 @@
|
||||
QT += webkitwidgets network widgets sql core
|
||||
|
||||
TARGET = AjaxCrawlerProcess
|
||||
|
||||
CONFIG += console
|
||||
CONFIG -= app_bundle
|
||||
|
||||
TEMPLATE = app
|
||||
|
||||
HEADERS += \
|
||||
snewscrawler.h \
|
||||
sreplygetmanage.h \
|
||||
srunnable.h \
|
||||
data.h \
|
||||
scrawlerdata.h
|
||||
|
||||
SOURCES += \
|
||||
snewscrawler.cpp \
|
||||
main.cpp \
|
||||
sreplygetmanage.cpp \
|
||||
srunnable.cpp \
|
||||
scrawlerdata.cpp
|
||||
35
AjaxCrawlerProcess/data.h
Normal file
35
AjaxCrawlerProcess/data.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef DATA
|
||||
#define DATA
|
||||
#include <QString>
|
||||
|
||||
enum E_REPLY
|
||||
{
|
||||
E_REPLY_USER_ID = 0,
|
||||
E_REPLY_USER_NICKNAME,
|
||||
E_REPLY_DATE,
|
||||
E_REPLY_CONTENT,
|
||||
E_REPLY_COUNT_GOOD,
|
||||
E_REPLY_COUNT_BAD,
|
||||
E_REPLY_COUNT_LIKE,
|
||||
E_REPLY_MAX,
|
||||
};
|
||||
|
||||
const QString g_strJsonReplyHead[E_REPLY_MAX] = {
|
||||
"maskUserId",
|
||||
"userNickname",
|
||||
"sRegDate",
|
||||
"content",
|
||||
"goodCount",
|
||||
"badCount",
|
||||
"likeCount",
|
||||
};
|
||||
|
||||
struct SReplyData
|
||||
{
|
||||
int m_nReplyReplyCount;
|
||||
QString m_strReplyData[E_REPLY_MAX];
|
||||
QStringList m_strReplyReply;
|
||||
};
|
||||
|
||||
#endif // DATA
|
||||
|
||||
52
AjaxCrawlerProcess/main.cpp
Normal file
52
AjaxCrawlerProcess/main.cpp
Normal file
@@ -0,0 +1,52 @@
|
||||
#include <QApplication>
|
||||
#include "snewscrawler.h"
|
||||
#include <QSqlDatabase>
|
||||
#include <QWebSettings>
|
||||
#include <iostream>
|
||||
#include <QFile>
|
||||
#include <time.h>
|
||||
#include <QTextStream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void Debug(QString _strFilename,QString _strData)
|
||||
{
|
||||
QFile file(_strFilename);
|
||||
if (!file.open(QIODevice::WriteOnly | QIODevice::Text | QIODevice::Append))
|
||||
return;
|
||||
QTextStream out(&file);
|
||||
out << _strData;
|
||||
file.close();
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
srand(time(0));
|
||||
QApplication a(argc, argv);
|
||||
a.setApplicationName(QString("Chrome"));
|
||||
a.setApplicationVersion(QString("39.0.2171.95"));
|
||||
|
||||
QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL");
|
||||
db.setHostName("bigbird.iptime.org");
|
||||
db.setUserName("admin");
|
||||
db.setPassword("admin123");
|
||||
db.setDatabaseName("concepters");
|
||||
|
||||
if (db.open() == false)
|
||||
{
|
||||
cout << "error : db open fail...";
|
||||
return 0;
|
||||
}
|
||||
QWebSettings::setObjectCacheCapacities(0,0,0);
|
||||
QWebSettings::clearMemoryCaches();
|
||||
QStringList strArgv;
|
||||
for (int i = 1; i < argc ; i++)
|
||||
strArgv.push_back(argv[i]);
|
||||
|
||||
SNewsCrawler *process = new SNewsCrawler;
|
||||
QObject::connect(process, SIGNAL(finished()), QApplication::instance(), SLOT(quit()));
|
||||
process->load(strArgv);
|
||||
|
||||
return a.exec();
|
||||
|
||||
}
|
||||
171
AjaxCrawlerProcess/scrawlerdata.cpp
Normal file
171
AjaxCrawlerProcess/scrawlerdata.cpp
Normal file
@@ -0,0 +1,171 @@
|
||||
#include "scrawlerdata.h"
|
||||
#include <QSqlQuery>
|
||||
#include <iostream>
|
||||
#include <QVariant>
|
||||
#include <QSqlDatabase>
|
||||
#include <QSqlError>
|
||||
#include <QDateTime>
|
||||
#include <QDebug>
|
||||
using namespace std;
|
||||
SCrawlerData::SCrawlerData()
|
||||
{
|
||||
m_strColumn[ARTICLE_DATA] = "article_data";
|
||||
m_strColumn[ARTICLE_DATE] = "article_date";
|
||||
m_strColumn[ARTICLE_FORM] = "article_form";
|
||||
m_strColumn[ARTICLE_HIT] = "article_hit";
|
||||
m_strColumn[ARTICLE_ID] = "article_id";
|
||||
m_strColumn[ARTICLE_NICKNAME] = "article_nickname";
|
||||
m_strColumn[ARTICLE_ORDER] = "article_order";
|
||||
m_strColumn[ARTICLE_PARENT] = "article_parent";
|
||||
m_strColumn[ARTICLE_PROFILE] = "article_profile";
|
||||
m_strColumn[ARTICLE_PROFILEURL] = "article_profileurl";
|
||||
m_strColumn[ARTICLE_TITLE] = "article_title";
|
||||
m_strColumn[ARTICLE_URL] = "article_url";
|
||||
m_strColumn[KEYWORD_ID] = "keyword_id";
|
||||
m_strColumn[PLATFORM_FORM] = "platform_form";
|
||||
m_strColumn[PLATFORM_ID] = "platform_id";
|
||||
m_strColumn[PLATFORM_NAME] = "platform_name";
|
||||
m_strColumn[PLATFORM_TITLE] = "platform_title";
|
||||
m_strColumn[REPLY_URL] = "reply_url";
|
||||
m_strColumn[ETC] = "etc";
|
||||
}
|
||||
|
||||
SCrawlerData::~SCrawlerData()
|
||||
{
|
||||
clear();
|
||||
for(int i = 0; i < TOTAL_COUNT; i++)
|
||||
{
|
||||
m_strColumn[i].clear();
|
||||
}
|
||||
}
|
||||
|
||||
void SCrawlerData::clear()
|
||||
{
|
||||
for(int i = 0; i < TOTAL_COUNT; i++)
|
||||
{
|
||||
m_strData[i].clear();
|
||||
}
|
||||
}
|
||||
|
||||
void SCrawlerData::clear(int _num)
|
||||
{
|
||||
m_strData[_num].clear();
|
||||
}
|
||||
|
||||
QString SCrawlerData::getData(int _num)
|
||||
{
|
||||
return m_strData[_num];
|
||||
}
|
||||
|
||||
void SCrawlerData::setTable(QString _str)
|
||||
{
|
||||
m_strTable = _str;
|
||||
}
|
||||
|
||||
void SCrawlerData::setData(QString _str, int _num)
|
||||
{
|
||||
m_strData[_num] = _str;
|
||||
}
|
||||
|
||||
bool SCrawlerData::sendDB()
|
||||
{
|
||||
QSqlQuery query;
|
||||
|
||||
QString strQuery;
|
||||
strQuery = "insert into " + m_strTable + "(";
|
||||
|
||||
for(int i = 0; i < TOTAL_COUNT; i++)
|
||||
{
|
||||
strQuery += (m_strColumn[i] + ",");
|
||||
}
|
||||
|
||||
strQuery = strQuery.left(strQuery.size() - 1);
|
||||
strQuery += ") VALUES (";
|
||||
|
||||
for(int i = 0; i < TOTAL_COUNT; i++)
|
||||
{
|
||||
strQuery += (":" + m_strColumn[i] + ",");
|
||||
}
|
||||
|
||||
strQuery = strQuery.left(strQuery.size() - 1);
|
||||
strQuery += ")";
|
||||
|
||||
query.prepare(strQuery.toUtf8());
|
||||
|
||||
for(int i = 0; i < TOTAL_COUNT; i++)
|
||||
{
|
||||
if(i == ARTICLE_ORDER)
|
||||
query.bindValue(QString(":" + m_strColumn[i]), m_strData[i].trimmed().toInt());
|
||||
else
|
||||
query.bindValue(QString(":" + m_strColumn[i]), m_strData[i].trimmed().toUtf8());
|
||||
}
|
||||
|
||||
if (query.exec()==false)
|
||||
{
|
||||
cout << "error : " << query.lastError().text().toStdString();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
QString SCrawlerData::GetSafeUtf(QString _strData)
|
||||
{
|
||||
QString str;
|
||||
QChar *pch = _strData.data();
|
||||
|
||||
for (int i = 0; i < _strData.length(); i++)
|
||||
{
|
||||
/*
|
||||
if (pch[i].unicode() >= 12593 && pch[i].unicode() <= 12622)
|
||||
str += pch[i];
|
||||
if (pch[i].unicode() >= 44032 && pch[i].unicode() <= 55203)
|
||||
str += pch[i];
|
||||
//if (pch[i].isDigit() || pch[i].isNumber() || pch[i].isSpace() || pch[i].isLower() || pch[i].isUpper() || pch[i].isSymbol())
|
||||
str += pch[i];
|
||||
*/
|
||||
if (pch[i].isPrint() || pch[i].isSpace())
|
||||
str += pch[i];
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
QString SCrawlerData::SqlString(QString _str)
|
||||
{
|
||||
_str = _str.replace("'","\\'");
|
||||
return _str;
|
||||
}
|
||||
|
||||
bool SCrawlerData::deleteDB(QString _str, int _num)
|
||||
{
|
||||
QSqlQuery sql;
|
||||
|
||||
QString strQuery = "delete from ";
|
||||
strQuery += m_strTable;
|
||||
strQuery += QString(" where ");
|
||||
strQuery += m_strColumn[_num];
|
||||
strQuery += QString(" = '");
|
||||
strQuery += _str + "'";
|
||||
|
||||
if (sql.exec(strQuery.toUtf8()) == false)
|
||||
{
|
||||
cout << "error " << sql.lastError().text().toStdString();
|
||||
cout << strQuery.toStdString();
|
||||
}
|
||||
}
|
||||
|
||||
QString SCrawlerData::GetDate(QString _strDate)
|
||||
{
|
||||
QString strOut;
|
||||
if (_strDate.contains("오전"))
|
||||
strOut = _strDate.replace(" 오전","");
|
||||
if (_strDate.contains("오후"))
|
||||
{
|
||||
strOut = _strDate.replace(" 오후","");
|
||||
QDateTime dateTime = QDateTime::fromString(strOut,"yyyy.MM.dd h:mm");
|
||||
strOut = dateTime.addSecs(60*60*12).toString("yyyy.MM.dd HH:mm");
|
||||
}
|
||||
return strOut;
|
||||
}
|
||||
|
||||
|
||||
55
AjaxCrawlerProcess/scrawlerdata.h
Normal file
55
AjaxCrawlerProcess/scrawlerdata.h
Normal file
@@ -0,0 +1,55 @@
|
||||
#ifndef SCRAWLERDATA
|
||||
#define SCRAWLERDATA
|
||||
|
||||
#endif // SCRAWLERDATA
|
||||
|
||||
#include <QString>
|
||||
#include <QStringList>
|
||||
|
||||
class SCrawlerData
|
||||
{
|
||||
public:
|
||||
enum E_COLUMN
|
||||
{
|
||||
PLATFORM_NAME = 0,
|
||||
PLATFORM_FORM,
|
||||
PLATFORM_TITLE,
|
||||
ARTICLE_FORM,
|
||||
ARTICLE_PARENT,
|
||||
ARTICLE_ID,
|
||||
ARTICLE_NICKNAME,
|
||||
ARTICLE_TITLE,
|
||||
ARTICLE_DATA,
|
||||
ARTICLE_URL,
|
||||
ARTICLE_HIT,
|
||||
ARTICLE_DATE,
|
||||
ARTICLE_ORDER,
|
||||
ARTICLE_PROFILE,
|
||||
ARTICLE_PROFILEURL,
|
||||
PLATFORM_ID,
|
||||
KEYWORD_ID,
|
||||
REPLY_URL,
|
||||
ETC,
|
||||
TOTAL_COUNT,
|
||||
};
|
||||
|
||||
private:
|
||||
QString m_strData[TOTAL_COUNT];
|
||||
QString m_strColumn[TOTAL_COUNT];
|
||||
QString m_strTable;
|
||||
|
||||
public:
|
||||
SCrawlerData();
|
||||
~SCrawlerData();
|
||||
QString getData(int _num);
|
||||
QString SqlString(QString _str);
|
||||
QString GetSafeUtf(QString _strData);
|
||||
QString GetTable();
|
||||
QString GetDate(QString _strDate);
|
||||
void setData(QString _str, int _num);
|
||||
void clear();
|
||||
void clear(int _num);
|
||||
bool sendDB();
|
||||
bool deleteDB(QString _str, int _num);
|
||||
void setTable(QString _str);
|
||||
};
|
||||
156
AjaxCrawlerProcess/snewscrawler.cpp
Normal file
156
AjaxCrawlerProcess/snewscrawler.cpp
Normal file
@@ -0,0 +1,156 @@
|
||||
#include "snewscrawler.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <QNetworkRequest>
|
||||
#include <QWebFrame>
|
||||
#include <QWebElement>
|
||||
#include <QWebElementCollection>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include <QFile>
|
||||
#include <QTextStream>
|
||||
|
||||
void SNewsCrawler::Debug(QString _strFilename,QString _strData)
|
||||
{
|
||||
QFile file(_strFilename);
|
||||
if (!file.open(QIODevice::WriteOnly | QIODevice::Text | QIODevice::Append))
|
||||
return;
|
||||
QTextStream out(&file);
|
||||
out << _strData;
|
||||
file.close();
|
||||
}
|
||||
|
||||
SNewsCrawler::SNewsCrawler(QObject *parent) : QObject(parent) , m_bUse(false)
|
||||
{
|
||||
m_page = new QWebPage;
|
||||
connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool)));
|
||||
}
|
||||
|
||||
SNewsCrawler::~SNewsCrawler()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void SNewsCrawler::load(QStringList _strlistArgv)
|
||||
{
|
||||
m_strUrl = _strlistArgv.at(0);
|
||||
cout << m_strUrl.toStdString() << endl;
|
||||
QUrl url = QUrl(m_strUrl);
|
||||
if (url.scheme().isEmpty())
|
||||
url.setScheme("http");
|
||||
|
||||
m_page->settings()->setAttribute(QWebSettings::AutoLoadImages,false);
|
||||
QNetworkRequest *request = new QNetworkRequest;
|
||||
request->setUrl(url);
|
||||
|
||||
m_data.setData(m_strUrl, SCrawlerData::ARTICLE_URL);
|
||||
m_data.setTable("data_"+_strlistArgv.at(1));
|
||||
m_data.setData(_strlistArgv.at(2), SCrawlerData::KEYWORD_ID);
|
||||
/*
|
||||
request->setRawHeader("Cache-Control","max-age=0, no-cache");
|
||||
request->setRawHeader("Pragma","no-cache");
|
||||
request->setRawHeader("Expires","Thu, 01 Jan 1970 16:00:00 GMT");
|
||||
if (m_strReper.isEmpty() == false && m_nSelect == E_NAVER_CAFE_DATA)
|
||||
request->setRawHeader("Referer",m_strReper.toLocal8Bit());
|
||||
*/
|
||||
request->setRawHeader("Accept-Language","ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4,zh-CN;q=0.2,zh;q=0.2");
|
||||
m_page->mainFrame()->load(*request);
|
||||
|
||||
}
|
||||
|
||||
QWebElement SNewsCrawler::Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib="",const QString _strFind="")
|
||||
{
|
||||
QWebElementCollection elements = _FindElement.findAll(_strElement);
|
||||
foreach (QWebElement element, elements)
|
||||
{
|
||||
if (element.attribute(_strAttrib) == _strFind)
|
||||
{
|
||||
return element;
|
||||
}
|
||||
}
|
||||
QWebElement element;
|
||||
return element;
|
||||
}
|
||||
|
||||
void SNewsCrawler::saveResult(bool ok)
|
||||
{
|
||||
if (m_bUse) return;
|
||||
if (!ok)
|
||||
cout << "Failed loading";
|
||||
else
|
||||
{
|
||||
QString strTitle,strDate,strData,strPlatID,strPlatTitle,strlike;
|
||||
{
|
||||
QWebElement element = Find(m_page->mainFrame()->documentElement(),"div","class","article_info");
|
||||
{
|
||||
strTitle = Find(element,"h3","id","articleTitle").toPlainText(); // Title;
|
||||
strDate = Find(element,"span","class","t11").toPlainText(); // Date
|
||||
}
|
||||
strData = Find(m_page->mainFrame()->documentElement(),"div","id","articleBodyContents").toPlainText();
|
||||
strlike = Find(m_page->mainFrame()->documentElement(),"div","class","u_likeit_module").toPlainText();
|
||||
//entertainment
|
||||
if (strTitle.isEmpty())
|
||||
{
|
||||
QWebElement elementTitle = Find(m_page->mainFrame()->documentElement(),"div","class","end_ct_area");
|
||||
strTitle = Find(elementTitle,"p","class","end_tit").toPlainText();
|
||||
}
|
||||
//entertainment
|
||||
if (strDate.isEmpty()) strDate = Find(element,"em").toPlainText();
|
||||
if (strData.isEmpty()) strData = Find(m_page->mainFrame()->documentElement(),"div","id","articeBody").toPlainText();
|
||||
|
||||
if (strlike.isEmpty() == false) strData += "\r\nlike(" + QString::number(strlike.toInt()) + ")";
|
||||
else
|
||||
{
|
||||
Debug("out.html",m_page->mainFrame()->toHtml());
|
||||
}
|
||||
|
||||
element = Find(m_page->mainFrame()->documentElement(),"div","class","press_logo");
|
||||
{
|
||||
strPlatID = Find(element,"a").attribute("href");
|
||||
strPlatTitle = Find(element,"img").attribute("alt");
|
||||
QStringList strlistPlat = strPlatID.split(".");
|
||||
if(strlistPlat.size() > 2)
|
||||
{
|
||||
if (strlistPlat.at(0) == QString("http://www"))
|
||||
strPlatID = strlistPlat.at(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//platform_title,platform_id
|
||||
|
||||
m_data.deleteDB(m_strUrl,SCrawlerData::ARTICLE_URL);
|
||||
m_data.setData(m_data.GetSafeUtf(strTitle), SCrawlerData::ARTICLE_TITLE);
|
||||
m_data.setData(m_data.GetSafeUtf(strData), SCrawlerData::ARTICLE_DATA);
|
||||
m_data.setData(strPlatID,SCrawlerData::PLATFORM_ID);
|
||||
m_data.setData(strPlatTitle,SCrawlerData::PLATFORM_TITLE);
|
||||
m_data.setData(strDate, SCrawlerData::ARTICLE_DATE);
|
||||
m_data.setData("naver", SCrawlerData::PLATFORM_NAME);
|
||||
m_data.setData("news", SCrawlerData::PLATFORM_FORM);
|
||||
m_data.setData("body", SCrawlerData::ARTICLE_FORM);
|
||||
m_data.sendDB();
|
||||
saveFrame(m_page->mainFrame());
|
||||
m_strUrl.split("&");
|
||||
m_reply.SetUrl(m_strUrl);
|
||||
m_reply.Start(&m_data);
|
||||
if (m_bUse)
|
||||
cout << "ok";
|
||||
else
|
||||
cout << "fail";
|
||||
emit finished();
|
||||
}
|
||||
}
|
||||
|
||||
void SNewsCrawler::saveFrame(QWebFrame *frame)
|
||||
{
|
||||
if (m_bUse) return;
|
||||
if (frame->frameName() == "ifrMemo")
|
||||
{
|
||||
m_reply.SetTotal(Find(frame->documentElement(),"strong","class","_totalcount").toPlainText().trimmed().replace(",","").toInt());
|
||||
m_bUse = true;
|
||||
}
|
||||
foreach(QWebFrame *childFrame, frame->childFrames())
|
||||
saveFrame(childFrame);
|
||||
}
|
||||
|
||||
33
AjaxCrawlerProcess/snewscrawler.h
Normal file
33
AjaxCrawlerProcess/snewscrawler.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef SNEWSCRAWLER_H
|
||||
#define SNEWSCRAWLER_H
|
||||
|
||||
#include <QWebPage>
|
||||
#include <QObject>
|
||||
#include <QThreadPool>
|
||||
|
||||
#include "sreplygetmanage.h"
|
||||
|
||||
class SNewsCrawler : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit SNewsCrawler(QObject *parent = 0);
|
||||
~SNewsCrawler();
|
||||
void load(QStringList _strlistArgv);
|
||||
void Debug(QString _strFilename,QString _strData);
|
||||
signals:
|
||||
void finished();
|
||||
private slots:
|
||||
void saveResult(bool ok);
|
||||
private:
|
||||
QWebPage *m_page;
|
||||
QString m_strUrl;
|
||||
SReplyGetManage m_reply;
|
||||
SCrawlerData m_data;
|
||||
bool m_bUse;
|
||||
private:
|
||||
QWebElement Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind);
|
||||
void saveFrame(QWebFrame *frame);
|
||||
};
|
||||
|
||||
#endif // SNEWSCRAWLER_H
|
||||
149
AjaxCrawlerProcess/sreplygetmanage.cpp
Normal file
149
AjaxCrawlerProcess/sreplygetmanage.cpp
Normal file
@@ -0,0 +1,149 @@
|
||||
#include <QJsonDocument>
|
||||
#include <QJsonObject>
|
||||
#include <QJsonValue>
|
||||
#include <QJsonArray>
|
||||
|
||||
#include "sreplygetmanage.h"
|
||||
#include "srunnable.h"
|
||||
#include "data.h"
|
||||
|
||||
extern void Debug(QString _strFilename,QString _strData);
|
||||
|
||||
SReplyGetManage::SReplyGetManage() : m_nTotal(-1)
|
||||
{
|
||||
m_pool = new QThreadPool;
|
||||
}
|
||||
|
||||
SReplyGetManage::~SReplyGetManage()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void SReplyGetManage::SetUrl(QString _strUrl)
|
||||
{
|
||||
QStringList strList = _strUrl.split("&");
|
||||
QString strOid,strAid;
|
||||
{
|
||||
foreach(QString str ,strList)
|
||||
{
|
||||
QStringList strListData = str.split("=");
|
||||
if (strListData.size() == 2 )
|
||||
{
|
||||
if (strListData.at(0) == "oid")
|
||||
strOid = strListData.at(1);
|
||||
if (strListData.at(0) == "aid")
|
||||
strAid = strListData.at(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
m_strGno = "news" + strOid + "%2C" + strAid;
|
||||
}
|
||||
|
||||
void SReplyGetManage::Start(SCrawlerData *_pData)
|
||||
{
|
||||
_pData->setData("reply", SCrawlerData::ARTICLE_FORM);
|
||||
if (m_nTotal <= 0) return;
|
||||
|
||||
QTcpSocket socket;
|
||||
socket.connectToHost("125.209.226.173",80);
|
||||
if(!socket.waitForConnected())
|
||||
{
|
||||
qDebug() << "Error: " << socket.errorString();
|
||||
}
|
||||
//m_nTotal = 1;
|
||||
QString strTotal = QString::number(m_nTotal);
|
||||
QString strParam = "pageSize="+strTotal+"&gno=" + m_strGno + "&serviceId=news&page=1";
|
||||
socket.write(QString("POST /api/comment/list.json HTTP/1.1\r\n"
|
||||
"Host: comment.news.naver.com\r\n"
|
||||
"Connection: keep-alive\r\n"
|
||||
"Content-Length: "+QString::number(strParam.size())+"\r\n"
|
||||
"charset: utf-8\r\n"
|
||||
"Origin: http://comment.news.naver.com\r\n"
|
||||
"User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.124 Safari/537.36\r\n"
|
||||
"Content-Type: application/x-www-form-urlencoded; charset=UTF-8\r\n"
|
||||
"Accept: */*\r\n"
|
||||
"Accept-Encoding: deflate\r\n"
|
||||
"Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4,zh-CN;q=0.2,zh;q=0.2\r\n\r\n"+strParam).toUtf8());
|
||||
QByteArray byArray;
|
||||
while (socket.waitForReadyRead())
|
||||
{
|
||||
byArray += socket.readAll();
|
||||
}
|
||||
|
||||
int index = byArray.indexOf("{");
|
||||
byArray=byArray.mid(index-2);
|
||||
bool bFlag = true;
|
||||
QString strOut;
|
||||
while(bFlag)
|
||||
{
|
||||
strOut += byArray.left(8188);
|
||||
byArray=byArray.mid(8192);
|
||||
if (byArray.size() <= 8192)
|
||||
{
|
||||
bFlag = false;
|
||||
strOut += byArray;
|
||||
}
|
||||
}
|
||||
strOut = strOut.replace("\r\n","").replace("\n","");
|
||||
QJsonParseError error;
|
||||
QJsonDocument d = QJsonDocument::fromJson(strOut.toUtf8(),&error);
|
||||
if (error.error != 0)
|
||||
{
|
||||
qDebug() << error.errorString();
|
||||
}
|
||||
m_pool->setMaxThreadCount(4);
|
||||
SReplyData *pReply = new SReplyData[m_nTotal];
|
||||
int nCount = 0;
|
||||
foreach(QJsonValue value ,d.object().value("message").toObject().value("result").toObject().value("commentReplies").toArray())
|
||||
{
|
||||
QJsonObject obj = value.toObject();
|
||||
pReply[nCount].m_nReplyReplyCount = obj["replyCount"].toInt();
|
||||
int i= E_REPLY_USER_ID;
|
||||
while (i < E_REPLY_MAX)
|
||||
{
|
||||
if (i <= E_REPLY_CONTENT)
|
||||
pReply[nCount].m_strReplyData[i] = obj[g_strJsonReplyHead[i]].toString();
|
||||
else
|
||||
{
|
||||
pReply[nCount].m_strReplyData[i] = QString::number(obj[g_strJsonReplyHead[i]].toInt());
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += "\r\n";
|
||||
for (int i = E_REPLY_COUNT_GOOD; i < E_REPLY_MAX ; i++)
|
||||
{
|
||||
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += "(";
|
||||
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += g_strJsonReplyHead[i];
|
||||
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += ",";
|
||||
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += pReply[nCount].m_strReplyData[i];
|
||||
pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += ")\r\n";
|
||||
}
|
||||
if (pReply[nCount].m_nReplyReplyCount > 0 )
|
||||
{
|
||||
SRunnable *pRun = new SRunnable();
|
||||
QString strParam = "commentNo=";
|
||||
strParam += QString::number(obj["commentReplyNo"].toInt());
|
||||
strParam += "&pageSize=100&gno=";
|
||||
strParam += m_strGno;
|
||||
strParam += "&serviceId=news";
|
||||
pRun->SetParam(strParam,&pReply[nCount].m_strReplyReply);
|
||||
pRun->m_strID = QString::number(obj["commentReplyNo"].toInt());
|
||||
pRun->setAutoDelete(true);
|
||||
m_pool->start(pRun);
|
||||
}
|
||||
nCount++;
|
||||
}
|
||||
m_pool->waitForDone();
|
||||
for (int i = 0; i < m_nTotal ; i++)
|
||||
{
|
||||
_pData->setData(_pData->GetDate(pReply[i].m_strReplyData[E_REPLY_DATE]), SCrawlerData::ARTICLE_DATE);
|
||||
_pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_CONTENT])), SCrawlerData::ARTICLE_DATA);
|
||||
_pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_NICKNAME])), SCrawlerData::ARTICLE_NICKNAME);
|
||||
_pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_ID])), SCrawlerData::ARTICLE_ID);
|
||||
_pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyReply.join("\r\n"))), SCrawlerData::ETC);
|
||||
_pData->sendDB();
|
||||
}
|
||||
delete [] pReply;
|
||||
}
|
||||
|
||||
21
AjaxCrawlerProcess/sreplygetmanage.h
Normal file
21
AjaxCrawlerProcess/sreplygetmanage.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef SREPLYGETMANAGE_H
|
||||
#define SREPLYGETMANAGE_H
|
||||
|
||||
#include <QThreadPool>
|
||||
#include "scrawlerdata.h"
|
||||
|
||||
class SReplyGetManage
|
||||
{
|
||||
public:
|
||||
SReplyGetManage();
|
||||
~SReplyGetManage();
|
||||
void SetTotal(int _nTotal) { m_nTotal = _nTotal;}
|
||||
void SetUrl(QString _strUrl);
|
||||
void Start(SCrawlerData *_pData);
|
||||
private:
|
||||
QThreadPool *m_pool;
|
||||
int m_nTotal;
|
||||
QString m_strGno;
|
||||
};
|
||||
|
||||
#endif // SREPLYGETMANAGE_H
|
||||
96
AjaxCrawlerProcess/srunnable.cpp
Normal file
96
AjaxCrawlerProcess/srunnable.cpp
Normal file
@@ -0,0 +1,96 @@
|
||||
#include <QJsonDocument>
|
||||
#include <QJsonObject>
|
||||
#include <QJsonValue>
|
||||
#include <QJsonArray>
|
||||
|
||||
#include "srunnable.h"
|
||||
#include "data.h"
|
||||
|
||||
extern void Debug(QString _strFilename,QString _strData);
|
||||
|
||||
SRunnable::SRunnable()
|
||||
{
|
||||
m_pstrOut = 0;
|
||||
}
|
||||
|
||||
SRunnable::~SRunnable()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void SRunnable::run()
|
||||
{
|
||||
QTcpSocket socket;
|
||||
socket.connectToHost("202.179.179.16",80);
|
||||
if(!socket.waitForConnected())
|
||||
{
|
||||
qDebug() << "Error: " << socket.errorString();
|
||||
}
|
||||
socket.write(QString("POST /api/reply/list.json HTTP/1.1\r\n"
|
||||
"Host: comment.news.naver.com\r\n"
|
||||
"User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0\r\n"
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n"
|
||||
"Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3\r\n"
|
||||
"Accept-Encoding: deflate\r\n"
|
||||
"Content-Type: application/x-www-form-urlencoded; charset=utf-8\r\n"
|
||||
"charset: utf-8\r\n"
|
||||
"Content-Length: " + QString::number(m_strParam.length()) + "\r\n"
|
||||
"Connection: keep-alive\r\n"
|
||||
"Pragma: no-cache\r\n"
|
||||
"Cache-Control: no-cache\r\n\r\n" + m_strParam).toUtf8());
|
||||
QByteArray byArray;
|
||||
while (socket.waitForReadyRead())
|
||||
{
|
||||
byArray += socket.readAll();
|
||||
}
|
||||
|
||||
QJsonDocument d;
|
||||
{
|
||||
QJsonParseError error;
|
||||
int index = byArray.indexOf("{");
|
||||
byArray=byArray.mid(index-2);
|
||||
bool bFlag = true;
|
||||
QString strOut;
|
||||
while(bFlag)
|
||||
{
|
||||
strOut += byArray.left(8188);
|
||||
byArray=byArray.mid(8192);
|
||||
if (byArray.size() <= 8192)
|
||||
{
|
||||
bFlag = false;
|
||||
strOut += byArray;
|
||||
}
|
||||
}
|
||||
strOut = strOut.replace("\r\n","").replace("\n","");
|
||||
if (strOut.length() <= 0 ) return;
|
||||
d = QJsonDocument::fromJson(strOut.trimmed().toUtf8(),&error);
|
||||
if (error.error != 0)
|
||||
{
|
||||
qDebug() << error.errorString();
|
||||
Debug("reply.json",strOut);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
QString astrOut[E_REPLY_MAX];
|
||||
foreach(QJsonValue value ,d.object().value("message").toObject().value("result").toObject().value("commentReplies").toArray())
|
||||
{
|
||||
QJsonObject obj = value.toObject();
|
||||
int i= E_REPLY_USER_ID;
|
||||
while (i < E_REPLY_MAX)
|
||||
{
|
||||
if (i <= E_REPLY_CONTENT)
|
||||
astrOut[i] = obj[g_strJsonReplyHead[i]].toString();
|
||||
else
|
||||
astrOut[i] = QString::number(obj[g_strJsonReplyHead[i]].toInt());
|
||||
i++;
|
||||
}
|
||||
for (i = 0; i < E_REPLY_MAX;i++)
|
||||
{
|
||||
QString strOut = g_strJsonReplyHead[i] + " : " + astrOut[i];
|
||||
m_pstrOut->push_back(strOut);
|
||||
}
|
||||
m_pstrOut->push_back("");
|
||||
}
|
||||
socket.close();
|
||||
}
|
||||
20
AjaxCrawlerProcess/srunnable.h
Normal file
20
AjaxCrawlerProcess/srunnable.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef SRUNNABLE_H
|
||||
#define SRUNNABLE_H
|
||||
|
||||
#include <QRunnable>
|
||||
#include <QTcpSocket>
|
||||
|
||||
class SRunnable : public QRunnable
|
||||
{
|
||||
public:
|
||||
SRunnable();
|
||||
~SRunnable();
|
||||
void SetParam(QString _strParam,QStringList *_pstrOut) { m_strParam = _strParam;m_pstrOut = _pstrOut;}
|
||||
QString m_strID;
|
||||
QStringList *m_pstrOut;
|
||||
protected:
|
||||
void run();
|
||||
private:
|
||||
QString m_strParam;
|
||||
};
|
||||
#endif // SRUNNABLE_H
|
||||
Reference in New Issue
Block a user