From 0a6c66099d05d1effd0230eebde79f9b488458d4 Mon Sep 17 00:00:00 2001 From: admin Date: Thu, 18 Jun 2015 08:49:08 +0000 Subject: [PATCH] =?UTF-8?q?=EB=89=B4=EC=8A=A4=20=ED=81=AC=EB=A1=A4?= =?UTF-8?q?=EB=9F=AC=20=EA=B8=B0=EB=8A=A5=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://192.168.0.12/source@151 8346c931-da38-4b9b-9d4c-e48b93cbd075 --- AjaxCrawlerProcess/AjaxCrawlerProcess.pro | 22 +++ AjaxCrawlerProcess/data.h | 35 +++++ AjaxCrawlerProcess/main.cpp | 52 +++++++ AjaxCrawlerProcess/scrawlerdata.cpp | 171 ++++++++++++++++++++++ AjaxCrawlerProcess/scrawlerdata.h | 55 +++++++ AjaxCrawlerProcess/snewscrawler.cpp | 156 ++++++++++++++++++++ AjaxCrawlerProcess/snewscrawler.h | 33 +++++ AjaxCrawlerProcess/sreplygetmanage.cpp | 149 +++++++++++++++++++ AjaxCrawlerProcess/sreplygetmanage.h | 21 +++ AjaxCrawlerProcess/srunnable.cpp | 96 ++++++++++++ AjaxCrawlerProcess/srunnable.h | 20 +++ 11 files changed, 810 insertions(+) create mode 100644 AjaxCrawlerProcess/AjaxCrawlerProcess.pro create mode 100644 AjaxCrawlerProcess/data.h create mode 100644 AjaxCrawlerProcess/main.cpp create mode 100644 AjaxCrawlerProcess/scrawlerdata.cpp create mode 100644 AjaxCrawlerProcess/scrawlerdata.h create mode 100644 AjaxCrawlerProcess/snewscrawler.cpp create mode 100644 AjaxCrawlerProcess/snewscrawler.h create mode 100644 AjaxCrawlerProcess/sreplygetmanage.cpp create mode 100644 AjaxCrawlerProcess/sreplygetmanage.h create mode 100644 AjaxCrawlerProcess/srunnable.cpp create mode 100644 AjaxCrawlerProcess/srunnable.h diff --git a/AjaxCrawlerProcess/AjaxCrawlerProcess.pro b/AjaxCrawlerProcess/AjaxCrawlerProcess.pro new file mode 100644 index 0000000..a473f24 --- /dev/null +++ b/AjaxCrawlerProcess/AjaxCrawlerProcess.pro @@ -0,0 +1,22 @@ +QT += webkitwidgets network widgets sql core + +TARGET = AjaxCrawlerProcess + +CONFIG += console +CONFIG -= app_bundle + +TEMPLATE = app + +HEADERS += \ + snewscrawler.h \ + sreplygetmanage.h \ + srunnable.h \ + data.h \ + scrawlerdata.h + +SOURCES += \ + snewscrawler.cpp \ + main.cpp \ + sreplygetmanage.cpp \ + srunnable.cpp \ + scrawlerdata.cpp diff --git a/AjaxCrawlerProcess/data.h b/AjaxCrawlerProcess/data.h new file mode 100644 index 0000000..e4f724b --- /dev/null +++ b/AjaxCrawlerProcess/data.h @@ -0,0 +1,35 @@ +#ifndef DATA +#define DATA +#include + +enum E_REPLY +{ + E_REPLY_USER_ID = 0, + E_REPLY_USER_NICKNAME, + E_REPLY_DATE, + E_REPLY_CONTENT, + E_REPLY_COUNT_GOOD, + E_REPLY_COUNT_BAD, + E_REPLY_COUNT_LIKE, + E_REPLY_MAX, +}; + +const QString g_strJsonReplyHead[E_REPLY_MAX] = { + "maskUserId", + "userNickname", + "sRegDate", + "content", + "goodCount", + "badCount", + "likeCount", +}; + +struct SReplyData +{ + int m_nReplyReplyCount; + QString m_strReplyData[E_REPLY_MAX]; + QStringList m_strReplyReply; +}; + +#endif // DATA + diff --git a/AjaxCrawlerProcess/main.cpp b/AjaxCrawlerProcess/main.cpp new file mode 100644 index 0000000..72785e7 --- /dev/null +++ b/AjaxCrawlerProcess/main.cpp @@ -0,0 +1,52 @@ +#include +#include "snewscrawler.h" +#include +#include +#include +#include +#include +#include + +using namespace std; + +void Debug(QString _strFilename,QString _strData) +{ + QFile file(_strFilename); + if (!file.open(QIODevice::WriteOnly | QIODevice::Text | QIODevice::Append)) + return; + QTextStream out(&file); + out << _strData; + file.close(); +} + +int main(int argc, char *argv[]) +{ + srand(time(0)); + QApplication a(argc, argv); + a.setApplicationName(QString("Chrome")); + a.setApplicationVersion(QString("39.0.2171.95")); + + QSqlDatabase db = QSqlDatabase::addDatabase("QMYSQL"); + db.setHostName("bigbird.iptime.org"); + db.setUserName("admin"); + db.setPassword("admin123"); + db.setDatabaseName("concepters"); + + if (db.open() == false) + { + cout << "error : db open fail..."; + return 0; + } + QWebSettings::setObjectCacheCapacities(0,0,0); + QWebSettings::clearMemoryCaches(); + QStringList strArgv; + for (int i = 1; i < argc ; i++) + strArgv.push_back(argv[i]); + + SNewsCrawler *process = new SNewsCrawler; + QObject::connect(process, SIGNAL(finished()), QApplication::instance(), SLOT(quit())); + process->load(strArgv); + + return a.exec(); + +} diff --git a/AjaxCrawlerProcess/scrawlerdata.cpp b/AjaxCrawlerProcess/scrawlerdata.cpp new file mode 100644 index 0000000..3081b35 --- /dev/null +++ b/AjaxCrawlerProcess/scrawlerdata.cpp @@ -0,0 +1,171 @@ +#include "scrawlerdata.h" +#include +#include +#include +#include +#include +#include +#include +using namespace std; +SCrawlerData::SCrawlerData() +{ + m_strColumn[ARTICLE_DATA] = "article_data"; + m_strColumn[ARTICLE_DATE] = "article_date"; + m_strColumn[ARTICLE_FORM] = "article_form"; + m_strColumn[ARTICLE_HIT] = "article_hit"; + m_strColumn[ARTICLE_ID] = "article_id"; + m_strColumn[ARTICLE_NICKNAME] = "article_nickname"; + m_strColumn[ARTICLE_ORDER] = "article_order"; + m_strColumn[ARTICLE_PARENT] = "article_parent"; + m_strColumn[ARTICLE_PROFILE] = "article_profile"; + m_strColumn[ARTICLE_PROFILEURL] = "article_profileurl"; + m_strColumn[ARTICLE_TITLE] = "article_title"; + m_strColumn[ARTICLE_URL] = "article_url"; + m_strColumn[KEYWORD_ID] = "keyword_id"; + m_strColumn[PLATFORM_FORM] = "platform_form"; + m_strColumn[PLATFORM_ID] = "platform_id"; + m_strColumn[PLATFORM_NAME] = "platform_name"; + m_strColumn[PLATFORM_TITLE] = "platform_title"; + m_strColumn[REPLY_URL] = "reply_url"; + m_strColumn[ETC] = "etc"; +} + +SCrawlerData::~SCrawlerData() +{ + clear(); + for(int i = 0; i < TOTAL_COUNT; i++) + { + m_strColumn[i].clear(); + } +} + +void SCrawlerData::clear() +{ + for(int i = 0; i < TOTAL_COUNT; i++) + { + m_strData[i].clear(); + } +} + +void SCrawlerData::clear(int _num) +{ + m_strData[_num].clear(); +} + +QString SCrawlerData::getData(int _num) +{ + return m_strData[_num]; +} + +void SCrawlerData::setTable(QString _str) +{ + m_strTable = _str; +} + +void SCrawlerData::setData(QString _str, int _num) +{ + m_strData[_num] = _str; +} + +bool SCrawlerData::sendDB() +{ + QSqlQuery query; + + QString strQuery; + strQuery = "insert into " + m_strTable + "("; + + for(int i = 0; i < TOTAL_COUNT; i++) + { + strQuery += (m_strColumn[i] + ","); + } + + strQuery = strQuery.left(strQuery.size() - 1); + strQuery += ") VALUES ("; + + for(int i = 0; i < TOTAL_COUNT; i++) + { + strQuery += (":" + m_strColumn[i] + ","); + } + + strQuery = strQuery.left(strQuery.size() - 1); + strQuery += ")"; + + query.prepare(strQuery.toUtf8()); + + for(int i = 0; i < TOTAL_COUNT; i++) + { + if(i == ARTICLE_ORDER) + query.bindValue(QString(":" + m_strColumn[i]), m_strData[i].trimmed().toInt()); + else + query.bindValue(QString(":" + m_strColumn[i]), m_strData[i].trimmed().toUtf8()); + } + + if (query.exec()==false) + { + cout << "error : " << query.lastError().text().toStdString(); + return false; + } + + return true; +} + +QString SCrawlerData::GetSafeUtf(QString _strData) +{ + QString str; + QChar *pch = _strData.data(); + + for (int i = 0; i < _strData.length(); i++) + { + /* + if (pch[i].unicode() >= 12593 && pch[i].unicode() <= 12622) + str += pch[i]; + if (pch[i].unicode() >= 44032 && pch[i].unicode() <= 55203) + str += pch[i]; + //if (pch[i].isDigit() || pch[i].isNumber() || pch[i].isSpace() || pch[i].isLower() || pch[i].isUpper() || pch[i].isSymbol()) + str += pch[i]; + */ + if (pch[i].isPrint() || pch[i].isSpace()) + str += pch[i]; + } + return str; +} + +QString SCrawlerData::SqlString(QString _str) +{ + _str = _str.replace("'","\\'"); + return _str; +} + +bool SCrawlerData::deleteDB(QString _str, int _num) +{ + QSqlQuery sql; + + QString strQuery = "delete from "; + strQuery += m_strTable; + strQuery += QString(" where "); + strQuery += m_strColumn[_num]; + strQuery += QString(" = '"); + strQuery += _str + "'"; + + if (sql.exec(strQuery.toUtf8()) == false) + { + cout << "error " << sql.lastError().text().toStdString(); + cout << strQuery.toStdString(); + } +} + +QString SCrawlerData::GetDate(QString _strDate) +{ + QString strOut; + if (_strDate.contains("오전")) + strOut = _strDate.replace(" 오전",""); + if (_strDate.contains("오후")) + { + strOut = _strDate.replace(" 오후",""); + QDateTime dateTime = QDateTime::fromString(strOut,"yyyy.MM.dd h:mm"); + strOut = dateTime.addSecs(60*60*12).toString("yyyy.MM.dd HH:mm"); + } + return strOut; +} + + diff --git a/AjaxCrawlerProcess/scrawlerdata.h b/AjaxCrawlerProcess/scrawlerdata.h new file mode 100644 index 0000000..e8172f8 --- /dev/null +++ b/AjaxCrawlerProcess/scrawlerdata.h @@ -0,0 +1,55 @@ +#ifndef SCRAWLERDATA +#define SCRAWLERDATA + +#endif // SCRAWLERDATA + +#include +#include + +class SCrawlerData +{ +public: + enum E_COLUMN + { + PLATFORM_NAME = 0, + PLATFORM_FORM, + PLATFORM_TITLE, + ARTICLE_FORM, + ARTICLE_PARENT, + ARTICLE_ID, + ARTICLE_NICKNAME, + ARTICLE_TITLE, + ARTICLE_DATA, + ARTICLE_URL, + ARTICLE_HIT, + ARTICLE_DATE, + ARTICLE_ORDER, + ARTICLE_PROFILE, + ARTICLE_PROFILEURL, + PLATFORM_ID, + KEYWORD_ID, + REPLY_URL, + ETC, + TOTAL_COUNT, + }; + +private: + QString m_strData[TOTAL_COUNT]; + QString m_strColumn[TOTAL_COUNT]; + QString m_strTable; + +public: + SCrawlerData(); + ~SCrawlerData(); + QString getData(int _num); + QString SqlString(QString _str); + QString GetSafeUtf(QString _strData); + QString GetTable(); + QString GetDate(QString _strDate); + void setData(QString _str, int _num); + void clear(); + void clear(int _num); + bool sendDB(); + bool deleteDB(QString _str, int _num); + void setTable(QString _str); +}; diff --git a/AjaxCrawlerProcess/snewscrawler.cpp b/AjaxCrawlerProcess/snewscrawler.cpp new file mode 100644 index 0000000..d22a745 --- /dev/null +++ b/AjaxCrawlerProcess/snewscrawler.cpp @@ -0,0 +1,156 @@ +#include "snewscrawler.h" + +#include +#include +#include +#include +#include + +using namespace std; + +#include +#include + +void SNewsCrawler::Debug(QString _strFilename,QString _strData) +{ + QFile file(_strFilename); + if (!file.open(QIODevice::WriteOnly | QIODevice::Text | QIODevice::Append)) + return; + QTextStream out(&file); + out << _strData; + file.close(); +} + +SNewsCrawler::SNewsCrawler(QObject *parent) : QObject(parent) , m_bUse(false) +{ + m_page = new QWebPage; + connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool))); +} + +SNewsCrawler::~SNewsCrawler() +{ + +} + +void SNewsCrawler::load(QStringList _strlistArgv) +{ + m_strUrl = _strlistArgv.at(0); + cout << m_strUrl.toStdString() << endl; + QUrl url = QUrl(m_strUrl); + if (url.scheme().isEmpty()) + url.setScheme("http"); + + m_page->settings()->setAttribute(QWebSettings::AutoLoadImages,false); + QNetworkRequest *request = new QNetworkRequest; + request->setUrl(url); + + m_data.setData(m_strUrl, SCrawlerData::ARTICLE_URL); + m_data.setTable("data_"+_strlistArgv.at(1)); + m_data.setData(_strlistArgv.at(2), SCrawlerData::KEYWORD_ID); + /* + request->setRawHeader("Cache-Control","max-age=0, no-cache"); + request->setRawHeader("Pragma","no-cache"); + request->setRawHeader("Expires","Thu, 01 Jan 1970 16:00:00 GMT"); + if (m_strReper.isEmpty() == false && m_nSelect == E_NAVER_CAFE_DATA) + request->setRawHeader("Referer",m_strReper.toLocal8Bit()); + */ + request->setRawHeader("Accept-Language","ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4,zh-CN;q=0.2,zh;q=0.2"); + m_page->mainFrame()->load(*request); + +} + +QWebElement SNewsCrawler::Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib="",const QString _strFind="") +{ + QWebElementCollection elements = _FindElement.findAll(_strElement); + foreach (QWebElement element, elements) + { + if (element.attribute(_strAttrib) == _strFind) + { + return element; + } + } + QWebElement element; + return element; +} + +void SNewsCrawler::saveResult(bool ok) +{ + if (m_bUse) return; + if (!ok) + cout << "Failed loading"; + else + { + QString strTitle,strDate,strData,strPlatID,strPlatTitle,strlike; + { + QWebElement element = Find(m_page->mainFrame()->documentElement(),"div","class","article_info"); + { + strTitle = Find(element,"h3","id","articleTitle").toPlainText(); // Title; + strDate = Find(element,"span","class","t11").toPlainText(); // Date + } + strData = Find(m_page->mainFrame()->documentElement(),"div","id","articleBodyContents").toPlainText(); + strlike = Find(m_page->mainFrame()->documentElement(),"div","class","u_likeit_module").toPlainText(); + //e​ntertainment + if (strTitle.isEmpty()) + { + QWebElement elementTitle = Find(m_page->mainFrame()->documentElement(),"div","class","end_ct_area"); + strTitle = Find(elementTitle,"p","class","end_tit").toPlainText(); + } + //e​ntertainment + if (strDate.isEmpty()) strDate = Find(element,"em").toPlainText(); + if (strData.isEmpty()) strData = Find(m_page->mainFrame()->documentElement(),"div","id","articeBody").toPlainText(); + + if (strlike.isEmpty() == false) strData += "\r\nlike(" + QString::number(strlike.toInt()) + ")"; + else + { + Debug("out.html",m_page->mainFrame()->toHtml()); + } + + element = Find(m_page->mainFrame()->documentElement(),"div","class","press_logo"); + { + strPlatID = Find(element,"a").attribute("href"); + strPlatTitle = Find(element,"img").attribute("alt"); + QStringList strlistPlat = strPlatID.split("."); + if(strlistPlat.size() > 2) + { + if (strlistPlat.at(0) == QString("http://www")) + strPlatID = strlistPlat.at(1); + } + } + } + + //platform_title,platform_id + + m_data.deleteDB(m_strUrl,SCrawlerData::ARTICLE_URL); + m_data.setData(m_data.GetSafeUtf(strTitle), SCrawlerData::ARTICLE_TITLE); + m_data.setData(m_data.GetSafeUtf(strData), SCrawlerData::ARTICLE_DATA); + m_data.setData(strPlatID,SCrawlerData::PLATFORM_ID); + m_data.setData(strPlatTitle,SCrawlerData::PLATFORM_TITLE); + m_data.setData(strDate, SCrawlerData::ARTICLE_DATE); + m_data.setData("naver", SCrawlerData::PLATFORM_NAME); + m_data.setData("news", SCrawlerData::PLATFORM_FORM); + m_data.setData("body", SCrawlerData::ARTICLE_FORM); + m_data.sendDB(); + saveFrame(m_page->mainFrame()); + m_strUrl.split("&"); + m_reply.SetUrl(m_strUrl); + m_reply.Start(&m_data); + if (m_bUse) + cout << "ok"; + else + cout << "fail"; + emit finished(); + } +} + +void SNewsCrawler::saveFrame(QWebFrame *frame) +{ + if (m_bUse) return; + if (frame->frameName() == "ifrMemo") + { + m_reply.SetTotal(Find(frame->documentElement(),"strong","class","_totalcount").toPlainText().trimmed().replace(",","").toInt()); + m_bUse = true; + } + foreach(QWebFrame *childFrame, frame->childFrames()) + saveFrame(childFrame); +} + diff --git a/AjaxCrawlerProcess/snewscrawler.h b/AjaxCrawlerProcess/snewscrawler.h new file mode 100644 index 0000000..c7dc4d1 --- /dev/null +++ b/AjaxCrawlerProcess/snewscrawler.h @@ -0,0 +1,33 @@ +#ifndef SNEWSCRAWLER_H +#define SNEWSCRAWLER_H + +#include +#include +#include + +#include "sreplygetmanage.h" + +class SNewsCrawler : public QObject +{ + Q_OBJECT +public: + explicit SNewsCrawler(QObject *parent = 0); + ~SNewsCrawler(); + void load(QStringList _strlistArgv); + void Debug(QString _strFilename,QString _strData); +signals: + void finished(); +private slots: + void saveResult(bool ok); +private: + QWebPage *m_page; + QString m_strUrl; + SReplyGetManage m_reply; + SCrawlerData m_data; + bool m_bUse; +private: + QWebElement Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind); + void saveFrame(QWebFrame *frame); +}; + +#endif // SNEWSCRAWLER_H diff --git a/AjaxCrawlerProcess/sreplygetmanage.cpp b/AjaxCrawlerProcess/sreplygetmanage.cpp new file mode 100644 index 0000000..4399ac8 --- /dev/null +++ b/AjaxCrawlerProcess/sreplygetmanage.cpp @@ -0,0 +1,149 @@ +#include +#include +#include +#include + +#include "sreplygetmanage.h" +#include "srunnable.h" +#include "data.h" + +extern void Debug(QString _strFilename,QString _strData); + +SReplyGetManage::SReplyGetManage() : m_nTotal(-1) +{ + m_pool = new QThreadPool; +} + +SReplyGetManage::~SReplyGetManage() +{ + +} + +void SReplyGetManage::SetUrl(QString _strUrl) +{ + QStringList strList = _strUrl.split("&"); + QString strOid,strAid; + { + foreach(QString str ,strList) + { + QStringList strListData = str.split("="); + if (strListData.size() == 2 ) + { + if (strListData.at(0) == "oid") + strOid = strListData.at(1); + if (strListData.at(0) == "aid") + strAid = strListData.at(1); + } + } + } + m_strGno = "news" + strOid + "%2C" + strAid; +} + +void SReplyGetManage::Start(SCrawlerData *_pData) +{ + _pData->setData("reply", SCrawlerData::ARTICLE_FORM); + if (m_nTotal <= 0) return; + + QTcpSocket socket; + socket.connectToHost("125.209.226.173",80); + if(!socket.waitForConnected()) + { + qDebug() << "Error: " << socket.errorString(); + } + //m_nTotal = 1; + QString strTotal = QString::number(m_nTotal); + QString strParam = "pageSize="+strTotal+"&gno=" + m_strGno + "&serviceId=news&page=1"; + socket.write(QString("POST /api/comment/list.json HTTP/1.1\r\n" + "Host: comment.news.naver.com\r\n" + "Connection: keep-alive\r\n" + "Content-Length: "+QString::number(strParam.size())+"\r\n" + "charset: utf-8\r\n" + "Origin: http://comment.news.naver.com\r\n" + "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.124 Safari/537.36\r\n" + "Content-Type: application/x-www-form-urlencoded; charset=UTF-8\r\n" + "Accept: */*\r\n" + "Accept-Encoding: deflate\r\n" + "Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4,zh-CN;q=0.2,zh;q=0.2\r\n\r\n"+strParam).toUtf8()); + QByteArray byArray; + while (socket.waitForReadyRead()) + { + byArray += socket.readAll(); + } + + int index = byArray.indexOf("{"); + byArray=byArray.mid(index-2); + bool bFlag = true; + QString strOut; + while(bFlag) + { + strOut += byArray.left(8188); + byArray=byArray.mid(8192); + if (byArray.size() <= 8192) + { + bFlag = false; + strOut += byArray; + } + } + strOut = strOut.replace("\r\n","").replace("\n",""); + QJsonParseError error; + QJsonDocument d = QJsonDocument::fromJson(strOut.toUtf8(),&error); + if (error.error != 0) + { + qDebug() << error.errorString(); + } + m_pool->setMaxThreadCount(4); + SReplyData *pReply = new SReplyData[m_nTotal]; + int nCount = 0; + foreach(QJsonValue value ,d.object().value("message").toObject().value("result").toObject().value("commentReplies").toArray()) + { + QJsonObject obj = value.toObject(); + pReply[nCount].m_nReplyReplyCount = obj["replyCount"].toInt(); + int i= E_REPLY_USER_ID; + while (i < E_REPLY_MAX) + { + if (i <= E_REPLY_CONTENT) + pReply[nCount].m_strReplyData[i] = obj[g_strJsonReplyHead[i]].toString(); + else + { + pReply[nCount].m_strReplyData[i] = QString::number(obj[g_strJsonReplyHead[i]].toInt()); + } + i++; + } + + pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += "\r\n"; + for (int i = E_REPLY_COUNT_GOOD; i < E_REPLY_MAX ; i++) + { + pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += "("; + pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += g_strJsonReplyHead[i]; + pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += ","; + pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += pReply[nCount].m_strReplyData[i]; + pReply[nCount].m_strReplyData[E_REPLY_CONTENT] += ")\r\n"; + } + if (pReply[nCount].m_nReplyReplyCount > 0 ) + { + SRunnable *pRun = new SRunnable(); + QString strParam = "commentNo="; + strParam += QString::number(obj["commentReplyNo"].toInt()); + strParam += "&pageSize=100&gno="; + strParam += m_strGno; + strParam += "&serviceId=news"; + pRun->SetParam(strParam,&pReply[nCount].m_strReplyReply); + pRun->m_strID = QString::number(obj["commentReplyNo"].toInt()); + pRun->setAutoDelete(true); + m_pool->start(pRun); + } + nCount++; + } + m_pool->waitForDone(); + for (int i = 0; i < m_nTotal ; i++) + { + _pData->setData(_pData->GetDate(pReply[i].m_strReplyData[E_REPLY_DATE]), SCrawlerData::ARTICLE_DATE); + _pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_CONTENT])), SCrawlerData::ARTICLE_DATA); + _pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_NICKNAME])), SCrawlerData::ARTICLE_NICKNAME); + _pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyData[E_REPLY_USER_ID])), SCrawlerData::ARTICLE_ID); + _pData->setData(_pData->SqlString(_pData->GetSafeUtf(pReply[i].m_strReplyReply.join("\r\n"))), SCrawlerData::ETC); + _pData->sendDB(); + } + delete [] pReply; +} + diff --git a/AjaxCrawlerProcess/sreplygetmanage.h b/AjaxCrawlerProcess/sreplygetmanage.h new file mode 100644 index 0000000..cc5fac0 --- /dev/null +++ b/AjaxCrawlerProcess/sreplygetmanage.h @@ -0,0 +1,21 @@ +#ifndef SREPLYGETMANAGE_H +#define SREPLYGETMANAGE_H + +#include +#include "scrawlerdata.h" + +class SReplyGetManage +{ +public: + SReplyGetManage(); + ~SReplyGetManage(); + void SetTotal(int _nTotal) { m_nTotal = _nTotal;} + void SetUrl(QString _strUrl); + void Start(SCrawlerData *_pData); +private: + QThreadPool *m_pool; + int m_nTotal; + QString m_strGno; +}; + +#endif // SREPLYGETMANAGE_H diff --git a/AjaxCrawlerProcess/srunnable.cpp b/AjaxCrawlerProcess/srunnable.cpp new file mode 100644 index 0000000..72d6dd1 --- /dev/null +++ b/AjaxCrawlerProcess/srunnable.cpp @@ -0,0 +1,96 @@ +#include +#include +#include +#include + +#include "srunnable.h" +#include "data.h" + +extern void Debug(QString _strFilename,QString _strData); + +SRunnable::SRunnable() +{ + m_pstrOut = 0; +} + +SRunnable::~SRunnable() +{ + +} + +void SRunnable::run() +{ + QTcpSocket socket; + socket.connectToHost("202.179.179.16",80); + if(!socket.waitForConnected()) + { + qDebug() << "Error: " << socket.errorString(); + } + socket.write(QString("POST /api/reply/list.json HTTP/1.1\r\n" + "Host: comment.news.naver.com\r\n" + "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0\r\n" + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" + "Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3\r\n" + "Accept-Encoding: deflate\r\n" + "Content-Type: application/x-www-form-urlencoded; charset=utf-8\r\n" + "charset: utf-8\r\n" + "Content-Length: " + QString::number(m_strParam.length()) + "\r\n" + "Connection: keep-alive\r\n" + "Pragma: no-cache\r\n" + "Cache-Control: no-cache\r\n\r\n" + m_strParam).toUtf8()); + QByteArray byArray; + while (socket.waitForReadyRead()) + { + byArray += socket.readAll(); + } + + QJsonDocument d; + { + QJsonParseError error; + int index = byArray.indexOf("{"); + byArray=byArray.mid(index-2); + bool bFlag = true; + QString strOut; + while(bFlag) + { + strOut += byArray.left(8188); + byArray=byArray.mid(8192); + if (byArray.size() <= 8192) + { + bFlag = false; + strOut += byArray; + } + } + strOut = strOut.replace("\r\n","").replace("\n",""); + if (strOut.length() <= 0 ) return; + d = QJsonDocument::fromJson(strOut.trimmed().toUtf8(),&error); + if (error.error != 0) + { + qDebug() << error.errorString(); + Debug("reply.json",strOut); + exit(0); + } + } + + QString astrOut[E_REPLY_MAX]; + foreach(QJsonValue value ,d.object().value("message").toObject().value("result").toObject().value("commentReplies").toArray()) + { + QJsonObject obj = value.toObject(); + int i= E_REPLY_USER_ID; + while (i < E_REPLY_MAX) + { + if (i <= E_REPLY_CONTENT) + astrOut[i] = obj[g_strJsonReplyHead[i]].toString(); + else + astrOut[i] = QString::number(obj[g_strJsonReplyHead[i]].toInt()); + i++; + } + for (i = 0; i < E_REPLY_MAX;i++) + { + QString strOut = g_strJsonReplyHead[i] + " : " + astrOut[i]; + m_pstrOut->push_back(strOut); + } + m_pstrOut->push_back(""); + } + socket.close(); +} diff --git a/AjaxCrawlerProcess/srunnable.h b/AjaxCrawlerProcess/srunnable.h new file mode 100644 index 0000000..bbcf836 --- /dev/null +++ b/AjaxCrawlerProcess/srunnable.h @@ -0,0 +1,20 @@ +#ifndef SRUNNABLE_H +#define SRUNNABLE_H + +#include +#include + +class SRunnable : public QRunnable +{ +public: + SRunnable(); + ~SRunnable(); + void SetParam(QString _strParam,QStringList *_pstrOut) { m_strParam = _strParam;m_pstrOut = _pstrOut;} + QString m_strID; + QStringList *m_pstrOut; +protected: + void run(); +private: + QString m_strParam; +}; +#endif // SRUNNABLE_H