git-svn-id: svn://192.168.0.12/source@26 8346c931-da38-4b9b-9d4c-e48b93cbd075

This commit is contained in:
admin
2015-02-25 08:32:21 +00:00
parent dc504721a1
commit fc20a5cffc
3 changed files with 60 additions and 35 deletions

View File

@@ -12,7 +12,8 @@ CONFIG -= app_bundle
TEMPLATE = app
SOURCES += main.cpp \
scrawler.cpp
scrawler.cpp \
scrawler_backup.cpp
HEADERS += \
scrawler.h

View File

@@ -75,6 +75,7 @@ void SCrawler::load(QStringList _strlistArgv)
m_strUrl = _strlistArgv[2];
m_nSelect = E_NAVER_BLOG_LIST;
m_strKeywordID = _strlistArgv[4];
//cout << "ok";
}
if (_strlistArgv[1] == "blog_url")
@@ -245,12 +246,12 @@ void SCrawler::saveFrameList(QWebFrame *frame)
QStringList strList = strUrl.split('/');
if (strList.at(0).compare("blog.naver.com") != 0 ) { cout << " not" << endl; continue; };
QString strQuery = "select URL from ";
QString strQuery = "select article_url from ";
strQuery += m_strTable;
strQuery += QString(" where URL = '%1'").arg(strUrl);
strQuery += QString(" where article_url = '%1'").arg(strUrl);
sql.exec(strQuery);
if (sql.size() == 0)
if (sql.size() == -1)
{
QString str = Find(sub,"a","class","txt84").toPlainText();
str = GetSafeUtf(str);
@@ -260,7 +261,7 @@ void SCrawler::saveFrameList(QWebFrame *frame)
QString strQuery = QString("insert into ");
strQuery += m_strTable;
strQuery += QString(" set Url='%1',platform_id='%2',platform_title='%3',keyword_id='%4'").arg("http://"+strUrl).arg(strUrl.split("/").at(1)).arg(str).arg(m_strKeywordID);
strQuery += QString(" set article_url='%1',platform_id='%2',platform_title='%3',keyword_id='%4'").arg("http://"+strUrl).arg(strUrl.split("/").at(1)).arg(str).arg(m_strKeywordID);
QString strUtf8(strQuery.toUtf8());
if (sql.exec(strUtf8) == false)
cout << "error : " << sql.lastError().text().toStdString();
@@ -308,10 +309,10 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
QString str = profile.toPlainText().split("\n").at(0);
if (str.isEmpty() == false)
{
QString strQuery = "update " + m_strTable + " set NICKNAME = '";
QString strQuery = "update " + m_strTable + " set article_nickname = '";
strQuery += str;
strQuery += "'";
strQuery += " where URL='";
strQuery += " where article_url='";
strQuery += m_strUrl;
strQuery += "'";
QString strUtf8(strQuery.toUtf8());
@@ -327,9 +328,10 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
if (frame->frameName().compare(QString("mainFrame")) == 0)
{
QString str[E_DATA_MAX];
QString strHead[E_DATA_MAX] = {"NickName","ArticleID","ArticleTitle","Date","Data","PlatformTitle"};
//QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data","platform_title"};
QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data"};
QWebElement proTitle = Find(frame->documentElement(),"td","id","blogTitleText");
str[E_DATA_PLATFORM_TITLE] = proTitle.toPlainText().trimmed();
// str[E_DATA_PLATFORM_TITLE] = proTitle.toPlainText().trimmed();
QWebElement profile = Find(frame->documentElement(),"div","id","blog-profile");
{
@@ -389,8 +391,12 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
{
QWebElement date = Find(post_top,"p","class","date fil5 pcol2 _postAddDate");
if (date.toPlainText().isEmpty() == false)
str[E_DATA_DATE] = date.toPlainText();
str[E_DATA_DATE] = date.toPlainText().trimmed().replace("/","-");
if ( str[E_DATA_DATE].isEmpty() == false)
{
str[E_DATA_DATE] += ":00";
cout << "str[E_DATA_DATE] = " << str[E_DATA_DATE].toStdString() << endl;
}
else
{
UpdateError("Error code 4");
@@ -403,28 +409,28 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
if (body.toPlainText().isEmpty()==false)
{
str[E_DATA_DATA] = body.toPlainText();
if (str[E_DATA_DATA].size() >= 18430)
str[E_DATA_DATA] = str[E_DATA_DATA].left(18430);
str[E_DATA_DATA] = GetSafeUtf(str[E_DATA_DATA]);
}
}
}
QString strQuery = "update " + m_strTable + " set ";
for(int i = 0; i < E_DATA_MAX ; i++)
for(int i = 0; i < E_DATA_MAX - 1 ; i++)
{
strQuery += strHead[i];
strQuery += "='";
strQuery += GetSafeUtf(SqlString(str[i].trimmed()));
//strQuery += GetSafeUtf(SqlString(str[i].trimmed()));
strQuery += str[i].trimmed();
strQuery += "'";
if( i != (E_DATA_MAX - 1) )
if( i != (E_DATA_MAX - 2) )
strQuery += ",";
}
strQuery += " where URL='";
strQuery += " where article_url='";
strQuery += m_strUrl;
strQuery += "'";
QString strUtf8(strQuery.toUtf8());
//qDebug() << strQuery;
//cout << "Query : " << strQuery.toStdString() << endl;
if (sql.exec(strUtf8)==false)
{
cout << "error : " << sql.lastError().text().toStdString();
@@ -455,25 +461,31 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
strNick = strParent = Find(element,"a","class","nick pcol2").toPlainText();
strDate = Find(element,"span","class","date fil5 pcol2").toPlainText();
strComm = Find(element,"dd","class","comm pcol2").toPlainText();
strComm = GetSafeUtf(strComm);
//strComm = GetSafeUtf(strComm);
if (strComm.isEmpty()== false)
{
strComm.replace("'","\\'");
strComm.replace("\"","\\\"");
strComm = strComm.trimmed();
//cout << strComm.toStdString() << endl;
QSqlQuery query;
query.prepare(QString("insert into " + m_strTable + " (Url,Nickname,Data,Date,Parent,UrlReply,RowNum) VALUES (:URL,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM)").toUtf8());
query.bindValue(":URL", strUrl.toUtf8());
query.bindValue(":NICK",strNick.toUtf8());
query.bindValue(":DATA",strComm.toUtf8());
query.bindValue(":DATE",strDate.toUtf8());
query.bindValue(":PARENT",QString("").toUtf8());
query.bindValue(":URLREPLY",m_strUrl.toUtf8());
query.bindValue(":ROWNUM",QString::number(nCount++).toUtf8());
query.prepare(QString("insert into " + m_strTable + " (article_url,article_nickname,article_data,article_date,article_parent,reply_url,article_order) VALUES (:URL,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM)").toUtf8());
cout << "m_strTable = " << m_strTable.toStdString() << endl;
cout << "url = " << strUrl.toStdString() << endl << "nickname = " << strNick.toStdString() << endl;
cout << "data = " << strComm.toStdString() << endl;
cout << "date = " << strDate.toStdString() << endl;
cout << "urlReply = " << m_strUrl.toStdString() << endl;
cout << "ronum = " << nCount << endl;
query.bindValue(":URL", strUrl.toUtf8());
query.bindValue(":NICK",strNick.toUtf8());
query.bindValue(":DATA",strComm.toUtf8());
query.bindValue(":DATE",strDate.toUtf8());
query.bindValue(":PARENT",QString("").toUtf8());
query.bindValue(":URLREPLY",m_strUrl.toUtf8());
query.bindValue(":ROWNUM",(nCount++));
if (query.exec()==false)
cout << "error : " << query.lastError().text().toStdString();
if (query.exec()==false)
cout << "error : " << query.lastError().text().toStdString();
}
}
if (element.attribute("class") == "reply _countableComment ")
@@ -491,19 +503,26 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
if (strComm.isEmpty() == false)
{
strComm = GetSafeUtf(strComm);
// strComm = GetSafeUtf(strComm);
strComm.replace("'","\\'");
strComm.replace("\"","\\\"");
strComm = strComm.trimmed();
QSqlQuery query;
query.prepare(QString("insert into " + m_strTable + " (Url,Nickname,Data,Date,Parent,UrlReply,RowNum) VALUES (:URL,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM)").toUtf8());
cout << "m_strTable = " << m_strTable.toStdString() << endl;
cout << "url = " << strUrl.toStdString() << endl << "nickname = " << strNick.toStdString() << endl;
cout << "data = " << strComm.toStdString() << endl;
cout << "date = " << strDate.toStdString() << endl;
cout << "urlReply = " << m_strUrl.toStdString() << endl;
cout << "ronum = " << nCount << endl;
cout << "parent = " << strParent.toStdString() << endl;
query.prepare(QString("insert into " + m_strTable + " (article_url,article_nickname,article_data,article_date,article_parent,reply_url,article_order) VALUES (:URL,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM)").toUtf8());
query.bindValue(":URL",strUrl.toUtf8());
query.bindValue(":NICK",strNick.toUtf8());
query.bindValue(":DATA",strComm.toUtf8());
query.bindValue(":DATE",strDate.toUtf8());
query.bindValue(":PARENT",strParent.toUtf8());
query.bindValue(":URLREPLY",m_strUrl.toUtf8());
query.bindValue(":ROWNUM",QString::number(nCount++).toUtf8());
query.bindValue(":ROWNUM",(nCount++));
if (query.exec()==false)
{
cout << "error : " << query.lastError().text().toStdString();

View File

@@ -17,9 +17,14 @@ QString SNaverBlogManage::makeGetListQuery(QString _str,QDate _date)
//http://cafeblog.search.naver.com/search.naver?where=post&sm=tab_pge&query=%EC%95%84%EC%9D%B4%ED%8F%B0&st=date&date_option=6&date_from=20131103&date_to=20131103&dup_remove=1&post_blogurl=&post_blogurl_without=&srchby=all&nso=so%3Add%2Cp%3Afrom20131103to20131103&ie=utf8&start=11
str = "http://cafeblog.search.naver.com/search.naver?where=post&sm=tab_pge&query=";
str += EncodetoUtf8(_str,true);
str += "&st=date&date_option=6&date_from=" + strDate + "&date_to=" + strDate ;
//str += "&st=date&date_option=6&date_from=" + strDate + "&date_to=" + strDate ;
str += "&st=date&date_option=6&date_from=";
str += strDate;
str += "&date_to=";
str += strDate ;
str += "&dup_remove=1&post_blogurl=&post_blogurl_without=&srchby=all&nso=so%3Add%2Cp%3Afrom";
str += strDate + "to" + strDate +"&ie=utf8&start=" + QString::number(m_ncList);
str += strDate + "to" + strDate +"&ie=utf8&start=";
str += QString::number(m_ncList);
return str;
}
@@ -54,7 +59,7 @@ void SNaverBlogManage::processFinished(QProcess *pPro,QString _strOut)
m_bLast = true;
m_strListURL.clear();
QSqlQuery query;
if(query.exec("SELECT URL FROM " + C_TABLE_URL + QString::number(m_nUrlTable) + " where ERROR is null"))
if(query.exec("SELECT ARTICLE_URL FROM " + C_TABLE_URL + QString::number(m_nUrlTable) + " where ERROR is null"))
{
m_pMain->InsertLog(m_nID,query.lastError().text());
}