update -> insert 방식으로 고침
git-svn-id: svn://192.168.0.12/source@104 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -42,6 +42,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
||||
m_strUrl = _strlistArgv[2];
|
||||
m_nSelect = E_NAVER_CAFE_DATA;
|
||||
m_strReper = _strlistArgv[4];
|
||||
m_strKeywordID = _strlistArgv[5];
|
||||
}
|
||||
|
||||
if (_strlistArgv[1] == "blog_list")
|
||||
@@ -56,6 +57,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
||||
{
|
||||
m_strUrl = _strlistArgv[2];
|
||||
m_nSelect = E_NAVER_BLOG_BODY;
|
||||
m_strKeywordID = _strlistArgv[4];
|
||||
m_bUse = true;
|
||||
}
|
||||
|
||||
@@ -83,6 +85,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
||||
m_strUrl = _strlistArgv[2];
|
||||
m_nSelect = E_DAUM_CAFE_DATA;
|
||||
m_strReper = _strlistArgv[4];
|
||||
m_strKeywordID = _strlistArgv[5];
|
||||
}
|
||||
|
||||
if (_strlistArgv[1] == "blog_list")
|
||||
@@ -159,14 +162,34 @@ void SCrawler::saveResult(bool ok)
|
||||
switch(m_nSelect)
|
||||
{
|
||||
case E_NAVER_CAFE_LIST:saveFrameCafeList(m_page->mainFrame());break;
|
||||
case E_NAVER_CAFE_DATA:saveFrameCafeUrl(m_page->mainFrame());break;
|
||||
case E_NAVER_CAFE_DATA:
|
||||
{
|
||||
saveFrameCafeUrl(m_page->mainFrame());
|
||||
bodydata.sendDB();
|
||||
break;
|
||||
}
|
||||
case E_NAVER_BLOG_LIST:saveFrameList(m_page->mainFrame());break;
|
||||
case E_NAVER_BLOG_BODY:saveFrameUrl(m_page->mainFrame());break;
|
||||
case E_NAVER_BLOG_BODY:
|
||||
{
|
||||
saveFrameUrl(m_page->mainFrame());
|
||||
bodydata.sendDB();
|
||||
break;
|
||||
}
|
||||
case E_NAVER_BLOG_REPLY:saveFrameComment(m_page->mainFrame());break;
|
||||
case E_DAUM_CAFE_LIST:saveFrameDaumCafeList(m_page->mainFrame());break;
|
||||
case E_DAUM_CAFE_DATA:saveFrameDaumCafeUrl(m_page->mainFrame());break;
|
||||
case E_DAUM_CAFE_DATA:
|
||||
{
|
||||
saveFrameDaumCafeUrl(m_page->mainFrame());
|
||||
bodydata.sendDB();
|
||||
break;
|
||||
}
|
||||
case E_DAUM_BLOG_LIST:saveFrameDaumBlogList(m_page->mainFrame());break;
|
||||
case E_DAUM_BLOG_BODY:saveFrameDaumBlogUrl(m_page->mainFrame());break;
|
||||
case E_DAUM_BLOG_BODY:
|
||||
{
|
||||
saveFrameDaumBlogUrl(m_page->mainFrame());
|
||||
bodydata.sendDB();
|
||||
break;
|
||||
}
|
||||
case E_DAUM_BLOG_REPLY:saveFrameDaumBlogComment(m_page->mainFrame());break;
|
||||
}
|
||||
|
||||
@@ -263,7 +286,10 @@ void SCrawler::saveFrameList(QWebFrame *frame)
|
||||
|
||||
QWebElement notFound = Find(frame->documentElement(),"div","id","notfound");
|
||||
if(notFound.isNull() == false)
|
||||
{
|
||||
m_bLast = true;
|
||||
return;
|
||||
}
|
||||
|
||||
QWebElement eleMain = Find(frame->documentElement(),"div","class","blog section _blogBase");
|
||||
QSqlQuery sql;
|
||||
@@ -383,6 +409,7 @@ void SCrawler::saveFrameList(QWebFrame *frame)
|
||||
else
|
||||
strPlatformId = strUrl.split("/").at(0).split(".").at(0);
|
||||
|
||||
/*
|
||||
QString strQuery = QString("insert into ");
|
||||
strQuery += m_strTable;
|
||||
strQuery += QString(" set platform_name='naver',platform_form='blog',article_form='body',article_url='%1',platform_id='%2',platform_title='%3',keyword_id='%4'").arg("http://"+strUrl).arg(strPlatformId).arg(str).arg(m_strKeywordID);
|
||||
@@ -390,6 +417,7 @@ void SCrawler::saveFrameList(QWebFrame *frame)
|
||||
if (sql.exec(strUtf8) == false)
|
||||
cout << "error : " << sql.lastError().text().toStdString();
|
||||
else
|
||||
*/
|
||||
cout << "o ";
|
||||
}
|
||||
//else
|
||||
@@ -399,6 +427,7 @@ void SCrawler::saveFrameList(QWebFrame *frame)
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
QWebElement total = Find(eleMain,"span","class","title_num");
|
||||
if (total.toPlainText().isEmpty()) {m_bError = true; return;}
|
||||
int nTotal = GetNumber(total.toPlainText().split("/").at(1));
|
||||
@@ -432,6 +461,9 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
QString str = profile.toPlainText().split("\n").at(0);
|
||||
if (str.isEmpty() == false)
|
||||
{
|
||||
bodydata.setData(str, bodydata.ARTICLE_NICKNAME);
|
||||
bodydata.setData(m_strUrl, bodydata.ARTICLE_URL);
|
||||
/*
|
||||
QString strQuery = "update " + m_strTable + " set article_nickname = '";
|
||||
strQuery += str;
|
||||
strQuery += "'";
|
||||
@@ -445,6 +477,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
UpdateError("Error code 1");
|
||||
m_bUse = false;
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
/*
|
||||
@@ -585,6 +618,36 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data","platform_title"};
|
||||
bodydata.setData(str[0].trimmed(), bodydata.ARTICLE_NICKNAME);
|
||||
bodydata.setData(str[1].trimmed(), bodydata.ARTICLE_ID);
|
||||
bodydata.setData(str[2].trimmed(), bodydata.ARTICLE_TITLE);
|
||||
bodydata.setData(str[3].trimmed(), bodydata.ARTICLE_DATE);
|
||||
bodydata.setData(str[4].trimmed(), bodydata.ARTICLE_DATA);
|
||||
bodydata.setData(str[5].trimmed(), bodydata.PLATFORM_TITLE);
|
||||
if(image.attribute("src").trimmed().length() != 0)
|
||||
{
|
||||
bodydata.setData(image.attribute("src").trimmed(), bodydata.ARTICLE_PROFILEURL);
|
||||
}
|
||||
strProfile = GetSafeUtf(strProfile);
|
||||
if(strProfile.length() > 0)
|
||||
{
|
||||
bodydata.setData(strProfile, bodydata.ARTICLE_PROFILE);
|
||||
}
|
||||
|
||||
bodydata.setData(m_strUrl, bodydata.ARTICLE_URL);
|
||||
bodydata.setTable(m_strTable);
|
||||
|
||||
bodydata.setData("naver", bodydata.PLATFORM_NAME);
|
||||
bodydata.setData("blog", bodydata.PLATFORM_FORM);
|
||||
bodydata.setData("body", bodydata.ARTICLE_FORM);
|
||||
bodydata.setData(m_strUrl.split("/").at(3), bodydata.PLATFORM_ID);
|
||||
bodydata.setData(m_strKeywordID, bodydata.KEYWORD_ID);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
QString strQuery = "update " + m_strTable + " set ";
|
||||
for(int i = 0; i < E_DATA_MAX ; i++)
|
||||
{
|
||||
@@ -623,6 +686,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
UpdateError("Error code 5");
|
||||
m_bUse = false;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
foreach(QWebFrame *childFrame, frame->childFrames())
|
||||
@@ -655,6 +719,17 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
||||
strId = strCommUrl.split("/").at(3).trimmed();
|
||||
if(strCommUrl.right(QString("blog.me").length()).compare("blog.me") == 0)
|
||||
strId = strCommUrl.split("/").at(2).split(".").at(0).trimmed();
|
||||
if(strCommUrl.left(1) == "/")
|
||||
{
|
||||
QStringList strList = strCommUrl.split("&");
|
||||
foreach(QString str, strList)
|
||||
{
|
||||
if(str.left(3) == "id=")
|
||||
{
|
||||
strId = str.right(str.length() - 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
strComm = GetSafeUtf(strComm);
|
||||
if (strComm.isEmpty()== false)
|
||||
@@ -707,7 +782,16 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
||||
if(strCommUrl.right(QString("blog.me").length()).compare("blog.me") == 0)
|
||||
strId = strCommUrl.split("/").at(2).split(".").at(0).trimmed();
|
||||
if(strCommUrl.left(1) == "/")
|
||||
strId = strUrl.split("/").at(3).trimmed();
|
||||
{
|
||||
QStringList strList = strCommUrl.split("&");
|
||||
foreach(QString str, strList)
|
||||
{
|
||||
if(str.left(3) == "id=")
|
||||
{
|
||||
strId = str.right(str.length() - 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(subNick.isEmpty() == false)
|
||||
{
|
||||
@@ -833,10 +917,11 @@ void SCrawler::saveFrameCafeList(QWebFrame *frame)
|
||||
|
||||
if (strUrl.split("/").at(2) == "cafe.naver.com")
|
||||
{
|
||||
QSqlQuery sql;
|
||||
//QSqlQuery sql;
|
||||
|
||||
//if (sql.size() == 0 || sql.size() == -1)
|
||||
{
|
||||
/*
|
||||
QString strQuery = QString("insert into ");
|
||||
strQuery += m_strTable;
|
||||
strQuery += QString(" set platform_name='naver',platform_form='cafe',article_form='body',article_url='%1',platform_id='%2',article_title='%3',keyword_id='%4'").arg(strUrl).arg(strUrl.split("/").at(3)).arg(strTitle).arg(m_strKeywordID);
|
||||
@@ -844,6 +929,7 @@ void SCrawler::saveFrameCafeList(QWebFrame *frame)
|
||||
if (sql.exec(strUtf8) == false)
|
||||
cout << "x " << sql.lastError().text().toStdString();
|
||||
else
|
||||
*/
|
||||
{
|
||||
cout << "o " << strUrl.toStdString() << endl;
|
||||
}
|
||||
@@ -875,23 +961,14 @@ void SCrawler::saveFrameCafeUrl(QWebFrame *frame)
|
||||
QWebElement other = Find(frame->documentElement(),"h1","class","d-none");
|
||||
if (other.toPlainText().isEmpty() == false)
|
||||
{
|
||||
QString strQuery = "update ";
|
||||
strQuery += m_strTable;
|
||||
strQuery += " set ";
|
||||
strQuery += "platform_title = '" + SqlString(GetSafeUtf(other.toPlainText())) + "'";
|
||||
strQuery += "where article_url='";
|
||||
strQuery += m_strUrl;
|
||||
strQuery += "'";
|
||||
QString strUtf8(strQuery.toUtf8());
|
||||
QSqlQuery sql;
|
||||
if (sql.exec(strUtf8) == false)
|
||||
cout << "error : " << sql.lastError().text().toStdString();
|
||||
bodydata.setData(SqlString(GetSafeUtf(other.toPlainText())), bodydata.PLATFORM_TITLE);
|
||||
}
|
||||
|
||||
|
||||
if (frame->frameName() == "cafe_main")
|
||||
{
|
||||
{
|
||||
QString strData,strDate,strNick,strID,strHits;
|
||||
QString strData,strDate,strNick,strID,strHits,strTitle;
|
||||
{
|
||||
QWebElement group = Find(frame->documentElement(),"div","class","tbody m-tcol-c");
|
||||
strData = SqlString(group.toPlainText().trimmed());
|
||||
@@ -909,6 +986,10 @@ void SCrawler::saveFrameCafeUrl(QWebFrame *frame)
|
||||
else
|
||||
strDate += ":00";
|
||||
}
|
||||
{
|
||||
QWebElement group = Find(frame->documentElement(),"span","class","b m-tcol-c");
|
||||
strTitle = SqlString(group.toPlainText().trimmed());
|
||||
}
|
||||
|
||||
{
|
||||
QWebElement group = Find(Find(frame->documentElement(),"div","class","etc-box"),"td","class","p-nick");
|
||||
@@ -941,6 +1022,21 @@ void SCrawler::saveFrameCafeUrl(QWebFrame *frame)
|
||||
strHits = Find(frame->documentElement(),"span","class","kin_count m-tcol-c _rosReadcount").toPlainText();
|
||||
}
|
||||
{
|
||||
|
||||
bodydata.setTable(m_strTable);
|
||||
bodydata.setData(strData, bodydata.ARTICLE_DATA);
|
||||
bodydata.setData(strDate, bodydata.ARTICLE_DATE);
|
||||
bodydata.setData(strNick, bodydata.ARTICLE_NICKNAME);
|
||||
bodydata.setData(strID, bodydata.ARTICLE_ID);
|
||||
bodydata.setData(strHits, bodydata.ARTICLE_HIT);
|
||||
bodydata.setData(m_strUrl, bodydata.ARTICLE_URL);
|
||||
bodydata.setData("naver", bodydata.PLATFORM_NAME);
|
||||
bodydata.setData("cafe", bodydata.PLATFORM_FORM);
|
||||
bodydata.setData("body", bodydata.ARTICLE_FORM);
|
||||
bodydata.setData(m_strUrl.split("/").at(3), bodydata.PLATFORM_ID);
|
||||
bodydata.setData(m_strKeywordID, bodydata.KEYWORD_ID);
|
||||
bodydata.setData(strTitle, bodydata.ARTICLE_TITLE);
|
||||
/*
|
||||
QSqlQuery sql;
|
||||
QString strQuery = "update ";
|
||||
strQuery += m_strTable;
|
||||
@@ -956,6 +1052,8 @@ void SCrawler::saveFrameCafeUrl(QWebFrame *frame)
|
||||
QString strUtf8(strQuery.toUtf8());
|
||||
if (sql.exec(strUtf8) == false)
|
||||
cout << "error : " << sql.lastError().text().toStdString();
|
||||
*/
|
||||
|
||||
}
|
||||
}
|
||||
// Comment
|
||||
@@ -1075,7 +1173,7 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
|
||||
QString strQuery = "delete from ";
|
||||
strQuery += m_strTable;
|
||||
strQuery += QString(" where article_url in %1").arg(strUrlList);
|
||||
qDebug() << strQuery;
|
||||
// qDebug() << strQuery;
|
||||
|
||||
if (sql.exec(strQuery.toUtf8()) == false)
|
||||
{
|
||||
@@ -1103,9 +1201,10 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
|
||||
|
||||
if (strUrl.split("/").at(2) == "cafe.daum.net")
|
||||
{
|
||||
QSqlQuery sql;
|
||||
//QSqlQuery sql;
|
||||
|
||||
{
|
||||
/*
|
||||
QString strQuery = QString("insert into ");
|
||||
strQuery += m_strTable;
|
||||
strQuery += QString(" set platform_name='daum',platform_form='cafe',article_form='body',article_url='%1',platform_id='%2',article_title='%3',keyword_id='%4'").arg(strUrl).arg(strUrl.split("/").at(3)).arg(strTitle).arg(m_strKeywordID);
|
||||
@@ -1113,6 +1212,7 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
|
||||
if (sql.exec(strUtf8) == false)
|
||||
cout << "x " << sql.lastError().text().toStdString();
|
||||
else
|
||||
*/
|
||||
cout << "o " << strUrl.toStdString() << endl;
|
||||
}
|
||||
//else
|
||||
@@ -1133,18 +1233,43 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
|
||||
m_bLast = true;
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
QWebElement noResult = Find(frame->documentElement(),"div","id","noResult");
|
||||
if(!noResult.isNull())
|
||||
{
|
||||
m_bLast = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
bool b_last = false;
|
||||
|
||||
b_last = Find(frame->documentElement(), "div", "class", "result_message mg_cont hide").isNull();
|
||||
b_last = b_last | !(Find(frame->documentElement(), "div", "class", "result_message mg_cont").isNull());
|
||||
|
||||
QWebElement total = Find(eleMain,"span","class","f_nb f_l");
|
||||
if (total.toPlainText().isEmpty()) {m_bError = true; return;}
|
||||
total.toPlainText().split("/").size();
|
||||
|
||||
QString strTotal = total.toPlainText().split("/").at(1);
|
||||
strTotal = strTotal.replace(",","");
|
||||
QRegExp rx("(\\d+)");
|
||||
int pos = 0;
|
||||
QList<QString> list;
|
||||
while ((pos = rx.indexIn(strTotal, pos)) != -1)
|
||||
{
|
||||
list << rx.cap(1);
|
||||
pos += rx.matchedLength();
|
||||
}
|
||||
|
||||
int nTotal = list.at(0).toInt();
|
||||
|
||||
QStringList strList = total.toPlainText().split("/").at(0).trimmed().split("-");
|
||||
int nNow = GetNumber(strList.at(strList.size() - 1));
|
||||
int nNowFirst = GetNumber(strList.at(strList.size() - 2));
|
||||
if (nNow >= 1000 || (nNow - nNowFirst) < 9 || b_last)
|
||||
if (nNow >= 1000 || nNow >= nTotal || (nNow - nNowFirst) < 9 || b_last)
|
||||
m_bLast = true;
|
||||
//cout << "nNow : " << nNow << endl << "nNow - nNowFirst: " << (nNow - nNowFirst) << endl << "b_last : " << b_last << endl;
|
||||
}
|
||||
@@ -1161,6 +1286,11 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
|
||||
if (strTitle.isEmpty() == false)
|
||||
{
|
||||
|
||||
bodydata.setTable(m_strTable);
|
||||
bodydata.setData(m_strUrl, bodydata.ARTICLE_URL);
|
||||
bodydata.setData(SqlString(GetSafeUtf(strTitle)), bodydata.PLATFORM_TITLE);
|
||||
/*
|
||||
QString strQuery = "update ";
|
||||
strQuery += m_strTable;
|
||||
strQuery += " set ";
|
||||
@@ -1172,6 +1302,7 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
QSqlQuery sql;
|
||||
if (sql.exec(strUtf8) == false)
|
||||
cout << "error : " << sql.lastError().text().toStdString();
|
||||
*/
|
||||
}
|
||||
|
||||
if (frame->frameName() == "down")
|
||||
@@ -1179,7 +1310,7 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
QString strHits;
|
||||
{
|
||||
//QString strData,strDate,strNick,strID,strHits;
|
||||
QString strData,strDate,strNick,strID;
|
||||
QString strData,strDate,strNick,strID,strTitle;
|
||||
{
|
||||
QWebElement group = Find(frame->documentElement(),"div","class","bbs_contents");
|
||||
strData = SqlString(group.toPlainText().trimmed());
|
||||
@@ -1199,6 +1330,11 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
else
|
||||
strDate += ":00";
|
||||
}
|
||||
{
|
||||
QWebElement group = Find(frame->documentElement(),"div","class","subject");
|
||||
QWebElement group2 = Find(group,"span","class","b");
|
||||
strTitle = SqlString(group2.toPlainText().trimmed());
|
||||
}
|
||||
|
||||
{
|
||||
QWebElement group = Find(Find(frame->documentElement(),"div","class","article_writer"),"a","href","#");
|
||||
@@ -1231,6 +1367,22 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
}
|
||||
*/
|
||||
{
|
||||
bodydata.setTable(m_strTable);
|
||||
bodydata.setData(strData, bodydata.ARTICLE_DATA);
|
||||
bodydata.setData(strDate, bodydata.ARTICLE_DATE);
|
||||
bodydata.setData(strNick, bodydata.ARTICLE_NICKNAME);
|
||||
if(!strID.isEmpty())
|
||||
bodydata.setData(strID, bodydata.ARTICLE_ID);
|
||||
bodydata.setData(strHits, bodydata.ARTICLE_HIT);
|
||||
bodydata.setData(m_strUrl, bodydata.ARTICLE_URL);
|
||||
bodydata.setData("daum", bodydata.PLATFORM_NAME);
|
||||
bodydata.setData("cafe", bodydata.PLATFORM_FORM);
|
||||
bodydata.setData("body", bodydata.ARTICLE_FORM);
|
||||
bodydata.setData(m_strUrl.split("/").at(3), bodydata.PLATFORM_ID);
|
||||
bodydata.setData(m_strKeywordID, bodydata.KEYWORD_ID);
|
||||
bodydata.setData(strTitle, bodydata.ARTICLE_TITLE);
|
||||
|
||||
/*
|
||||
QSqlQuery sql;
|
||||
QString strQuery = "update ";
|
||||
strQuery += m_strTable;
|
||||
@@ -1247,6 +1399,7 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
QString strUtf8(strQuery.toUtf8());
|
||||
if (sql.exec(strUtf8) == false)
|
||||
cout << "error : " << sql.lastError().text().toStdString();
|
||||
*/
|
||||
}
|
||||
}
|
||||
// Comment
|
||||
@@ -1498,7 +1651,7 @@ void SCrawler::setProxy()
|
||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||
//QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,"101.69.199.99",80)));
|
||||
//QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,"196.201.216.172",8088)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#define SCRAWLER_H
|
||||
|
||||
#include <QtWebKitWidgets>
|
||||
|
||||
#include "scrawlerdata.h"
|
||||
class SCrawler : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
@@ -36,6 +36,8 @@ private:
|
||||
QString m_strReper;
|
||||
QString m_strKeywordID;
|
||||
|
||||
SCrawlerData bodydata;
|
||||
|
||||
QWebPage *m_page;
|
||||
QString m_strFile;
|
||||
QString m_strUrl;
|
||||
|
||||
126
CrawlerProcess/scrawlerdata.cpp
Normal file
126
CrawlerProcess/scrawlerdata.cpp
Normal file
@@ -0,0 +1,126 @@
|
||||
#include "scrawlerdata.h"
|
||||
#include <QSqlQuery>
|
||||
#include <iostream>
|
||||
#include <QVariant>
|
||||
#include <QSqlDatabase>
|
||||
#include <QSqlError>
|
||||
using namespace std;
|
||||
SCrawlerData::SCrawlerData()
|
||||
{
|
||||
//platform_name,platform_form,platform_title,article_form,article_parent,article_id,article_nickname,article_title,article_data,article_url,article_hit,article_date,article_order,article_profile,article_profileurl,platform_id,keyword_id,reply_url
|
||||
m_strColumn[ARTICLE_DATA] = "article_data";
|
||||
m_strColumn[ARTICLE_DATE] = "article_date";
|
||||
m_strColumn[ARTICLE_FORM] = "article_form";
|
||||
m_strColumn[ARTICLE_HIT] = "article_hit";
|
||||
m_strColumn[ARTICLE_ID] = "article_id";
|
||||
m_strColumn[ARTICLE_NICKNAME] = "article_nickname";
|
||||
m_strColumn[ARTICLE_ORDER] = "article_order";
|
||||
m_strColumn[ARTICLE_PARENT] = "article_parent";
|
||||
m_strColumn[ARTICLE_PROFILE] = "article_profile";
|
||||
m_strColumn[ARTICLE_PROFILEURL] = "article_profileurl";
|
||||
m_strColumn[ARTICLE_TITLE] = "article_title";
|
||||
m_strColumn[ARTICLE_URL] = "article_url";
|
||||
m_strColumn[KEYWORD_ID] = "keyword_id";
|
||||
m_strColumn[PLATFORM_FORM] = "platform_form";
|
||||
m_strColumn[PLATFORM_ID] = "platform_id";
|
||||
m_strColumn[PLATFORM_NAME] = "platform_name";
|
||||
m_strColumn[PLATFORM_TITLE] = "platform_title";
|
||||
m_strColumn[REPLY_URL] = "reply_url";
|
||||
}
|
||||
|
||||
SCrawlerData::~SCrawlerData()
|
||||
{
|
||||
clear();
|
||||
for(int i = 0; i < TOTAL_COUNT; i++)
|
||||
{
|
||||
m_strColumn[i].clear();
|
||||
}
|
||||
}
|
||||
|
||||
void SCrawlerData::clear()
|
||||
{
|
||||
for(int i = 0; i < TOTAL_COUNT; i++)
|
||||
{
|
||||
m_strData[i].clear();
|
||||
}
|
||||
}
|
||||
|
||||
void SCrawlerData::clear(int _num)
|
||||
{
|
||||
m_strData[_num].clear();
|
||||
}
|
||||
|
||||
QString SCrawlerData::getData(int _num)
|
||||
{
|
||||
return m_strData[_num];
|
||||
}
|
||||
|
||||
void SCrawlerData::setTable(QString _str)
|
||||
{
|
||||
m_strTable = _str;
|
||||
}
|
||||
|
||||
void SCrawlerData::setData(QString _str, int _num)
|
||||
{
|
||||
m_strData[_num] = _str;
|
||||
}
|
||||
|
||||
bool SCrawlerData::sendDB()
|
||||
{
|
||||
QSqlQuery query;
|
||||
|
||||
QString strQuery;
|
||||
strQuery = "insert into " + m_strTable + "(";
|
||||
|
||||
for(int i = 0; i < TOTAL_COUNT; i++)
|
||||
{
|
||||
strQuery += (m_strColumn[i] + ",");
|
||||
}
|
||||
|
||||
strQuery = strQuery.left(strQuery.size() - 1);
|
||||
strQuery += ") VALUES (";
|
||||
|
||||
for(int i = 0; i < TOTAL_COUNT; i++)
|
||||
{
|
||||
strQuery += (":" + m_strColumn[i] + ",");
|
||||
}
|
||||
|
||||
strQuery = strQuery.left(strQuery.size() - 1);
|
||||
strQuery += ")";
|
||||
|
||||
query.prepare(strQuery.toUtf8());
|
||||
|
||||
for(int i = 0; i < TOTAL_COUNT; i++)
|
||||
{
|
||||
if(i == ARTICLE_ORDER)
|
||||
query.bindValue(QString(":" + m_strColumn[i]), m_strData[i].trimmed().toInt());
|
||||
query.bindValue(QString(":" + m_strColumn[i]), m_strData[i].trimmed().toUtf8());
|
||||
}
|
||||
|
||||
if (query.exec()==false)
|
||||
{
|
||||
cout << "error : " << query.lastError().text().toStdString();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
QString SCrawlerData::GetSafeUtf(QString _strData)
|
||||
{
|
||||
QString str;
|
||||
QChar *pch = _strData.data();
|
||||
|
||||
for (int i = 0; i < _strData.length(); i++)
|
||||
{
|
||||
if (pch[i].unicode() >= 12593 && pch[i].unicode() <= 12622)
|
||||
str += pch[i];
|
||||
if (pch[i].unicode() >= 44032 && pch[i].unicode() <= 55203)
|
||||
str += pch[i];
|
||||
if (pch[i].isDigit() || pch[i].isNumber() || pch[i].isSpace() || pch[i].isLower() || pch[i].isUpper() || pch[i].isSymbol() )
|
||||
str += pch[i];
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
54
CrawlerProcess/scrawlerdata.h
Normal file
54
CrawlerProcess/scrawlerdata.h
Normal file
@@ -0,0 +1,54 @@
|
||||
#ifndef SCRAWLERDATA
|
||||
#define SCRAWLERDATA
|
||||
|
||||
#endif // SCRAWLERDATA
|
||||
|
||||
#include <QString>
|
||||
#include <QStringList>
|
||||
class SCrawlerData
|
||||
{
|
||||
public:
|
||||
enum E_COLUMN
|
||||
{
|
||||
PLATFORM_NAME = 0,
|
||||
PLATFORM_FORM,
|
||||
PLATFORM_TITLE,
|
||||
ARTICLE_FORM,
|
||||
ARTICLE_PARENT,
|
||||
ARTICLE_ID,
|
||||
ARTICLE_NICKNAME,
|
||||
ARTICLE_TITLE,
|
||||
ARTICLE_DATA,
|
||||
ARTICLE_URL,
|
||||
ARTICLE_HIT,
|
||||
ARTICLE_DATE,
|
||||
ARTICLE_ORDER,
|
||||
ARTICLE_PROFILE,
|
||||
ARTICLE_PROFILEURL,
|
||||
PLATFORM_ID,
|
||||
KEYWORD_ID,
|
||||
REPLY_URL,
|
||||
TOTAL_COUNT
|
||||
};
|
||||
|
||||
private:
|
||||
QString m_strData[TOTAL_COUNT];
|
||||
QString m_strColumn[TOTAL_COUNT];
|
||||
QString m_strTable;
|
||||
|
||||
private:
|
||||
QString GetSafeUtf(QString _strData);
|
||||
QString getTable();
|
||||
|
||||
|
||||
|
||||
public:
|
||||
SCrawlerData();
|
||||
~SCrawlerData();
|
||||
QString getData(int _num);
|
||||
void setData(QString _str, int _num);
|
||||
void clear();
|
||||
void clear(int _num);
|
||||
bool sendDB();
|
||||
void setTable(QString _str);
|
||||
};
|
||||
Reference in New Issue
Block a user