소스 정리

git-svn-id: svn://192.168.0.12/source@158 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2015-07-08 08:09:34 +00:00
parent b324a32819
commit 3626f1ffa9

View File

@@ -484,52 +484,11 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
QWebElement profile = Find(frame->documentElement(),"div","class","profile_name");
QString str = profile.toPlainText().split("\n").at(0);
if (str.isEmpty() == false)
{
//bodydata.setData(str, bodydata.ARTICLE_NICKNAME);
bodydata.setData(m_strUrl, bodydata.ARTICLE_URL);
/*
QString strQuery = "update " + m_strTable + " set article_nickname = '";
strQuery += str;
strQuery += "'";
strQuery += " where article_url='";
strQuery += m_strUrl;
strQuery += "'";
QString strUtf8(strQuery.toUtf8());
if (sql.exec(strUtf8)==false)
{
cout << "error : " << sql.lastError().text().toStdString();
UpdateError("Error code 1");
m_bUse = false;
}
*/
}
}
/*
QString strHtml2 = frame->toHtml();
QString strFind2 = "blogpfthumb";
int start = strHtml2.indexOf(strFind2);
cout << "start = " << start << endl;
QString str222 = strHtml2.mid(start,30);
if(start != -1)
cout << "start String = " << str222.toStdString() << endl;
QWebElement image2 = Find(frame->documentElement(),"div","id","blog-profile");
cout << "p class image = " << image2.toInnerXml().toStdString() << endl;
image2 = Find(image2,"a","href","#");
image2 = Find(image2,"img","alt","프로필 이미지");
cout << "outer image profile = " << image2.attribute("src").toStdString() << endl;
image2 = FindMid(frame->documentElement(),"img","src","http://blogpfthumb",0,18);
cout << "outer image findmid = " << image2.attribute("src").toStdString() << endl;
*/
if (frame->frameName().compare(QString("mainFrame")) == 0)
{
QString str[E_DATA_MAX];
QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data","platform_title"};
QString strSympathy;
QString strProfile;
QWebElement proTitle = Find(frame->documentElement(),"meta","property","og:article:author");
str[E_DATA_PLATFORM_TITLE] = proTitle.attribute("content").split("|").at(1).trimmed();
@@ -573,16 +532,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
str[E_DATA_ID] = str[E_DATA_NICK];
}
else
{
/*
QWebElement id = Find(profile,"span","class","itemfont col");
if (id.toPlainText().isEmpty()==false)
{
str[E_DATA_ID] = id.toPlainText();
str[E_DATA_ID] = str[E_DATA_ID].replace("(","").replace(")","");
}
*/
{
if (str[E_DATA_ID].isEmpty())
{
if((m_strUrl.split("/").at(2).compare("blog.naver.com") == 0))
@@ -594,24 +544,12 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
if(str[E_DATA_NICK].length() == 0)
str[E_DATA_NICK] = str[E_DATA_ID];
//qDebug() << profile.toInnerXml();
image = Find(profile,"img","alt","프로필 이미지");
strProfile = Find(profile, "p", "class", "caption align").toPlainText().trimmed();
//strSympathy = FindLeft(Find(frame->documentElement(),"p","class","postre"),"a","class","pcol2 _symList").toPlainText().split(" ").at(1);
/*
cout << "inner image = " << image.attribute("src").toStdString() << endl;
image = FindMid(profile,"img","src","http://blogpfthumb",0,18);
cout << "inner image FindMid = " << image.attribute("src").toStdString() << endl;
cout << "str[E_DATA_ID] = " << str[E_DATA_ID].toStdString() << ", str[E_DATA_NICK] = " << str[E_DATA_NICK].toStdString() << endl;
*/
}
{
QWebElement post = Find(frame->documentElement(),"div","id","postListBody");
QWebElement post_top = Find(post,"table","class","post-top");
{
QWebElement title = Find(post_top,"span","class","pcol1 itemSubjectBoldfont");
if (title.toPlainText().isEmpty()==false)
@@ -672,46 +610,6 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
bodydata.setData(m_strUrl.split("/").at(3), bodydata.PLATFORM_ID);
bodydata.setData(m_strKeywordID, bodydata.KEYWORD_ID);
/*
QString strQuery = "update " + m_strTable + " set ";
for(int i = 0; i < E_DATA_MAX ; i++)
{
strQuery += strHead[i];
strQuery += "='";
//strQuery += GetSafeUtf(SqlString(str[i].trimmed()));
strQuery += str[i].trimmed();
strQuery += "'";
if( i != (E_DATA_MAX - 1) )
strQuery += ",";
}
if(image.attribute("src").trimmed().length() != 0)
{
strQuery += ", ";
strQuery += "article_profileurl='";
strQuery += image.attribute("src").trimmed();
strQuery += "'";
}
strProfile = GetSafeUtf(strProfile);
if(strProfile.length() > 0)
{
strQuery += ", ";
strQuery += "article_profile='";
strQuery += strProfile;
strQuery += "'";
}
strQuery += " where article_url='";
strQuery += m_strUrl;
strQuery += "'";
//cout << "strQuery = " << strQuery.toStdString() << endl;
QString strUtf8(strQuery.toUtf8());
if (sql.exec(strUtf8)==false)
{
cout << "error : " << sql.lastError().text().toStdString();
UpdateError("Error code 5");
m_bUse = false;
}
*/
}
foreach(QWebFrame *childFrame, frame->childFrames())
@@ -762,7 +660,6 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
strComm.replace("'","\\'");
strComm.replace("\"","\\\"");
strComm = strComm.trimmed();
//cout << strComm.toStdString() << endl;
QSqlQuery query;
if(strId.length() > 0)
{
@@ -771,21 +668,12 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
}
else
query.prepare(QString("insert into " + m_strTable + " (article_url,platform_id,article_nickname,article_data,article_date,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:URL,:PLATFORMID,:NICK,:DATA,:DATE,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
/*
cout << "m_strTable = " << m_strTable.toStdString() << endl;
cout << "url = " << strUrl.toStdString() << endl << "nickname = " << strNick.toStdString() << endl;
cout << "data = " << strComm.toStdString() << endl;
cout << "date = " << strDate.toStdString() << endl;
cout << "urlReply = " << m_strUrl.toStdString() << endl;
cout << "ronum = " << nCount << endl;
*/
query.bindValue(":URL", strUrl.toUtf8());
query.bindValue(":PLATFORMID",strUrl.split("/").at(3).toUtf8());
query.bindValue(":NICK",strNick.toUtf8());
query.bindValue(":DATA",strComm.toUtf8());
query.bindValue(":DATE",strDate.toUtf8());
//query.bindValue(":PARENT",QString("NULL").toUtf8());
query.bindValue(":URLREPLY",m_strUrl.toUtf8());
query.bindValue(":ROWNUM",(nCount++));
@@ -819,10 +707,7 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
}
if(subNick.isEmpty() == false)
{
//strNick = strParent;
strComm = strComm.right(strComm.size()-subNick.size()-1);
}
if (strComm.isEmpty() == false)
{
@@ -831,15 +716,6 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
strComm.replace("\"","\\\"");
strComm = strComm.trimmed();
QSqlQuery query;
/*
cout << "m_strTable = " << m_strTable.toStdString() << endl;
cout << "url = " << strUrl.toStdString() << endl << "nickname = " << strNick.toStdString() << endl;
cout << "data = " << strComm.toStdString() << endl;
cout << "date = " << strDate.toStdString() << endl;
cout << "urlReply = " << m_strUrl.toStdString() << endl;
cout << "ronum = " << nCount << endl;
cout << "parent = " << strParent.toStdString() << endl;
*/
if(strId.length() > 0)
{
query.prepare(QString("insert into " + m_strTable + " (article_id,article_url,platform_id,article_nickname,article_data,article_date,article_parent,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:ID,:URL,:PLATFORMID,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
@@ -943,27 +819,7 @@ void SCrawler::saveFrameCafeList(QWebFrame *frame)
}
if (strUrl.split("/").at(2) == "cafe.naver.com")
{
//QSqlQuery sql;
//if (sql.size() == 0 || sql.size() == -1)
{
/*
QString strQuery = QString("insert into ");
strQuery += m_strTable;
strQuery += QString(" set platform_name='naver',platform_form='cafe',article_form='body',article_url='%1',platform_id='%2',article_title='%3',keyword_id='%4'").arg(strUrl).arg(strUrl.split("/").at(3)).arg(strTitle).arg(m_strKeywordID);
QString strUtf8(strQuery.toUtf8());
if (sql.exec(strUtf8) == false)
cout << "x " << sql.lastError().text().toStdString();
else
*/
{
cout << "o " << strUrl.toStdString() << endl;
}
}
//else
// cout << "v " << strUrl.toStdString() << endl;
}
cout << "o " << strUrl.toStdString() << endl;
m_bUse = true;
}
}
@@ -1308,7 +1164,6 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
{
if (m_bUse) return;
QWebElement other = frame->documentElement().findFirst("title");
QString strTitle = other.toPlainText().trimmed().split("|").at(0).trimmed();
QString strUrl_;
@@ -1328,26 +1183,12 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
strUrl_ = m_strUrl;
}
bodydata.setData(SqlString(GetSafeUtf(strTitle)), bodydata.PLATFORM_TITLE);
/*
QString strQuery = "update ";
strQuery += m_strTable;
strQuery += " set ";
strQuery += "platform_title = '" + SqlString(GetSafeUtf(strTitle)) + "'";
strQuery += "where article_url='";
strQuery += m_strUrl;
strQuery += "'";
QString strUtf8(strQuery.toUtf8());
QSqlQuery sql;
if (sql.exec(strUtf8) == false)
cout << "error : " << sql.lastError().text().toStdString();
*/
}
if (frame->frameName() == "down")
{
QString strHits;
{
//QString strData,strDate,strNick,strID,strHits;
QString strData,strDate,strNick,strID,strTitle;
{
QWebElement group = Find(frame->documentElement(),"div","class","bbs_contents");
@@ -1359,12 +1200,7 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
strDate = group.toPlainText().trimmed().replace(".","-");
strDate = strDate.replace("- "," ");
if (strDate.isEmpty() == true)
{
//QWebElement subgroup = Find(frame->documentElement(),"input","name","PLAIN_REGDT").attribute("value");
//strDate = subgroup.toPlainText().trimmed();
strDate = Find(frame->documentElement(),"input","name","PLAIN_REGDT").attribute("value");
//strDate += " 00:00:00";
}
else
strDate += ":00";
}
@@ -1398,12 +1234,6 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
}
}
}
/*
if (strHits.isEmpty())
{
strHits = Find(frame->documentElement(),"span","class","kin_count m-tcol-c _rosReadcount").toPlainText();
}
*/
{
bodydata.setTable(m_strTable);
bodydata.setData(strData, bodydata.ARTICLE_DATA);
@@ -1428,36 +1258,13 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
bodydata.setData("body", bodydata.ARTICLE_FORM);
bodydata.setData(m_strUrl.split("/").at(3), bodydata.PLATFORM_ID);
bodydata.setData(m_strKeywordID, bodydata.KEYWORD_ID);
bodydata.setData(strTitle, bodydata.ARTICLE_TITLE);
/*
QSqlQuery sql;
QString strQuery = "update ";
strQuery += m_strTable;
strQuery += " set ";
strQuery += "article_data = '" + strData + "',";
strQuery += "article_date = '" + strDate + "',";
strQuery += "article_nickname = '" + strNick + "',";
if(!strID.isEmpty())
strQuery += "article_id = '" + strID + "',";
strQuery += "article_hit = '" + strHits + "'";
strQuery += "where article_url='";
strQuery += m_strUrl;
strQuery += "'";
QString strUtf8(strQuery.toUtf8());
if (sql.exec(strUtf8) == false)
cout << "error : " << sql.lastError().text().toStdString();
*/
bodydata.setData(strTitle, bodydata.ARTICLE_TITLE);
}
}
// Comment
{
QWebElement group = Find(frame->documentElement(),"div","class","commentDiv bg_sub");
QList<QWebElement> elements = FindAllMid(group,"div","id","_cmt-",0,5);
/*
* foreach(QWebElement element, elements)
cout << "element = " << element.toPlainText().toStdString() << endl;
*/
QString commHidden = "comment_hidden";
QString commPos = "comment_pos";
QString commReComm = "recomment_pos";
@@ -1508,8 +1315,7 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
query.bindValue(":ID",strID.toUtf8());
query.bindValue(":NICK",strNick.toUtf8());
query.bindValue(":DATA",strData.toUtf8());
query.bindValue(":DATE",strDate.toUtf8());
//query.bindValue(":URLREPLY",m_strReper.toUtf8());
query.bindValue(":DATE",strDate.toUtf8());
query.bindValue(":ROWNUM",nCount++);
query.bindValue(":PLATFORMID",m_strUrl.split("/").at(3).toUtf8());
query.bindValue(":HITS",strHits.toUtf8());
@@ -1588,6 +1394,12 @@ void SCrawler::saveFrameDaumBlogList(QWebFrame *frame){}
void SCrawler::saveFrameNewsList(QWebFrame *frame)
{
if (m_bUse == true) return;
QWebElement notFound = Find(frame->documentElement(),"div","class","no_content");
if(notFound.isNull() == false)
{
m_bLast = true;
return;
}
QWebElement eleMain = Find(frame->documentElement(),"div","class","srch_result_area headline");
foreach(QWebElement eleSub,eleMain.findAll("div"))
{
@@ -1596,7 +1408,7 @@ void SCrawler::saveFrameNewsList(QWebFrame *frame)
QString str = Find(eleSub,"a","class","go_naver").attribute("href");
if (str.trimmed().isEmpty()) continue;
if (str.contains("http://sports")) continue;
m_bNothing = true;
m_bNothing = true;
cout << "o " << str.toStdString() << endl;
}
}
@@ -1605,16 +1417,13 @@ void SCrawler::saveFrameNewsList(QWebFrame *frame)
foreach(QString str,strTotal)
{
if (str.trimmed().isEmpty() == false)
{
vecTotal.push_back(str.toInt());
}
}
if (vecTotal.size() == 3)
{
if (vecTotal[0] >= vecTotal[1])
m_bLast = true;
if (vecTotal[1] == vecTotal[2])
m_bLast = true;
if (vecTotal[0] >= vecTotal[1]) m_bLast = true;
if (vecTotal[1] == vecTotal[2]) m_bLast = true;
}
else
m_bError = true;