소스 정리
git-svn-id: svn://192.168.0.12/source@158 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -484,52 +484,11 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
QWebElement profile = Find(frame->documentElement(),"div","class","profile_name");
|
||||
QString str = profile.toPlainText().split("\n").at(0);
|
||||
if (str.isEmpty() == false)
|
||||
{
|
||||
//bodydata.setData(str, bodydata.ARTICLE_NICKNAME);
|
||||
bodydata.setData(m_strUrl, bodydata.ARTICLE_URL);
|
||||
/*
|
||||
QString strQuery = "update " + m_strTable + " set article_nickname = '";
|
||||
strQuery += str;
|
||||
strQuery += "'";
|
||||
strQuery += " where article_url='";
|
||||
strQuery += m_strUrl;
|
||||
strQuery += "'";
|
||||
QString strUtf8(strQuery.toUtf8());
|
||||
if (sql.exec(strUtf8)==false)
|
||||
{
|
||||
cout << "error : " << sql.lastError().text().toStdString();
|
||||
UpdateError("Error code 1");
|
||||
m_bUse = false;
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
/*
|
||||
QString strHtml2 = frame->toHtml();
|
||||
QString strFind2 = "blogpfthumb";
|
||||
int start = strHtml2.indexOf(strFind2);
|
||||
cout << "start = " << start << endl;
|
||||
|
||||
QString str222 = strHtml2.mid(start,30);
|
||||
|
||||
if(start != -1)
|
||||
cout << "start String = " << str222.toStdString() << endl;
|
||||
|
||||
|
||||
QWebElement image2 = Find(frame->documentElement(),"div","id","blog-profile");
|
||||
cout << "p class image = " << image2.toInnerXml().toStdString() << endl;
|
||||
image2 = Find(image2,"a","href","#");
|
||||
image2 = Find(image2,"img","alt","프로필 이미지");
|
||||
cout << "outer image profile = " << image2.attribute("src").toStdString() << endl;
|
||||
image2 = FindMid(frame->documentElement(),"img","src","http://blogpfthumb",0,18);
|
||||
cout << "outer image findmid = " << image2.attribute("src").toStdString() << endl;
|
||||
*/
|
||||
|
||||
if (frame->frameName().compare(QString("mainFrame")) == 0)
|
||||
{
|
||||
QString str[E_DATA_MAX];
|
||||
QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data","platform_title"};
|
||||
QString strSympathy;
|
||||
QString strProfile;
|
||||
QWebElement proTitle = Find(frame->documentElement(),"meta","property","og:article:author");
|
||||
str[E_DATA_PLATFORM_TITLE] = proTitle.attribute("content").split("|").at(1).trimmed();
|
||||
@@ -573,16 +532,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
str[E_DATA_ID] = str[E_DATA_NICK];
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
QWebElement id = Find(profile,"span","class","itemfont col");
|
||||
if (id.toPlainText().isEmpty()==false)
|
||||
{
|
||||
str[E_DATA_ID] = id.toPlainText();
|
||||
str[E_DATA_ID] = str[E_DATA_ID].replace("(","").replace(")","");
|
||||
}
|
||||
*/
|
||||
|
||||
{
|
||||
if (str[E_DATA_ID].isEmpty())
|
||||
{
|
||||
if((m_strUrl.split("/").at(2).compare("blog.naver.com") == 0))
|
||||
@@ -594,24 +544,12 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
if(str[E_DATA_NICK].length() == 0)
|
||||
str[E_DATA_NICK] = str[E_DATA_ID];
|
||||
|
||||
//qDebug() << profile.toInnerXml();
|
||||
|
||||
image = Find(profile,"img","alt","프로필 이미지");
|
||||
strProfile = Find(profile, "p", "class", "caption align").toPlainText().trimmed();
|
||||
//strSympathy = FindLeft(Find(frame->documentElement(),"p","class","postre"),"a","class","pcol2 _symList").toPlainText().split(" ").at(1);
|
||||
|
||||
/*
|
||||
cout << "inner image = " << image.attribute("src").toStdString() << endl;
|
||||
image = FindMid(profile,"img","src","http://blogpfthumb",0,18);
|
||||
cout << "inner image FindMid = " << image.attribute("src").toStdString() << endl;
|
||||
cout << "str[E_DATA_ID] = " << str[E_DATA_ID].toStdString() << ", str[E_DATA_NICK] = " << str[E_DATA_NICK].toStdString() << endl;
|
||||
*/
|
||||
|
||||
}
|
||||
{
|
||||
QWebElement post = Find(frame->documentElement(),"div","id","postListBody");
|
||||
QWebElement post_top = Find(post,"table","class","post-top");
|
||||
|
||||
{
|
||||
QWebElement title = Find(post_top,"span","class","pcol1 itemSubjectBoldfont");
|
||||
if (title.toPlainText().isEmpty()==false)
|
||||
@@ -672,46 +610,6 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
bodydata.setData(m_strUrl.split("/").at(3), bodydata.PLATFORM_ID);
|
||||
bodydata.setData(m_strKeywordID, bodydata.KEYWORD_ID);
|
||||
|
||||
/*
|
||||
QString strQuery = "update " + m_strTable + " set ";
|
||||
for(int i = 0; i < E_DATA_MAX ; i++)
|
||||
{
|
||||
strQuery += strHead[i];
|
||||
strQuery += "='";
|
||||
//strQuery += GetSafeUtf(SqlString(str[i].trimmed()));
|
||||
strQuery += str[i].trimmed();
|
||||
strQuery += "'";
|
||||
if( i != (E_DATA_MAX - 1) )
|
||||
strQuery += ",";
|
||||
}
|
||||
if(image.attribute("src").trimmed().length() != 0)
|
||||
{
|
||||
strQuery += ", ";
|
||||
strQuery += "article_profileurl='";
|
||||
strQuery += image.attribute("src").trimmed();
|
||||
strQuery += "'";
|
||||
}
|
||||
strProfile = GetSafeUtf(strProfile);
|
||||
if(strProfile.length() > 0)
|
||||
{
|
||||
strQuery += ", ";
|
||||
strQuery += "article_profile='";
|
||||
strQuery += strProfile;
|
||||
strQuery += "'";
|
||||
}
|
||||
strQuery += " where article_url='";
|
||||
strQuery += m_strUrl;
|
||||
strQuery += "'";
|
||||
//cout << "strQuery = " << strQuery.toStdString() << endl;
|
||||
QString strUtf8(strQuery.toUtf8());
|
||||
|
||||
if (sql.exec(strUtf8)==false)
|
||||
{
|
||||
cout << "error : " << sql.lastError().text().toStdString();
|
||||
UpdateError("Error code 5");
|
||||
m_bUse = false;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
foreach(QWebFrame *childFrame, frame->childFrames())
|
||||
@@ -762,7 +660,6 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
||||
strComm.replace("'","\\'");
|
||||
strComm.replace("\"","\\\"");
|
||||
strComm = strComm.trimmed();
|
||||
//cout << strComm.toStdString() << endl;
|
||||
QSqlQuery query;
|
||||
if(strId.length() > 0)
|
||||
{
|
||||
@@ -771,21 +668,12 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
||||
}
|
||||
else
|
||||
query.prepare(QString("insert into " + m_strTable + " (article_url,platform_id,article_nickname,article_data,article_date,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:URL,:PLATFORMID,:NICK,:DATA,:DATE,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
|
||||
/*
|
||||
cout << "m_strTable = " << m_strTable.toStdString() << endl;
|
||||
cout << "url = " << strUrl.toStdString() << endl << "nickname = " << strNick.toStdString() << endl;
|
||||
cout << "data = " << strComm.toStdString() << endl;
|
||||
cout << "date = " << strDate.toStdString() << endl;
|
||||
cout << "urlReply = " << m_strUrl.toStdString() << endl;
|
||||
cout << "ronum = " << nCount << endl;
|
||||
*/
|
||||
|
||||
query.bindValue(":URL", strUrl.toUtf8());
|
||||
query.bindValue(":PLATFORMID",strUrl.split("/").at(3).toUtf8());
|
||||
query.bindValue(":NICK",strNick.toUtf8());
|
||||
query.bindValue(":DATA",strComm.toUtf8());
|
||||
query.bindValue(":DATE",strDate.toUtf8());
|
||||
//query.bindValue(":PARENT",QString("NULL").toUtf8());
|
||||
query.bindValue(":URLREPLY",m_strUrl.toUtf8());
|
||||
query.bindValue(":ROWNUM",(nCount++));
|
||||
|
||||
@@ -819,10 +707,7 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
||||
}
|
||||
|
||||
if(subNick.isEmpty() == false)
|
||||
{
|
||||
//strNick = strParent;
|
||||
strComm = strComm.right(strComm.size()-subNick.size()-1);
|
||||
}
|
||||
|
||||
if (strComm.isEmpty() == false)
|
||||
{
|
||||
@@ -831,15 +716,6 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
||||
strComm.replace("\"","\\\"");
|
||||
strComm = strComm.trimmed();
|
||||
QSqlQuery query;
|
||||
/*
|
||||
cout << "m_strTable = " << m_strTable.toStdString() << endl;
|
||||
cout << "url = " << strUrl.toStdString() << endl << "nickname = " << strNick.toStdString() << endl;
|
||||
cout << "data = " << strComm.toStdString() << endl;
|
||||
cout << "date = " << strDate.toStdString() << endl;
|
||||
cout << "urlReply = " << m_strUrl.toStdString() << endl;
|
||||
cout << "ronum = " << nCount << endl;
|
||||
cout << "parent = " << strParent.toStdString() << endl;
|
||||
*/
|
||||
if(strId.length() > 0)
|
||||
{
|
||||
query.prepare(QString("insert into " + m_strTable + " (article_id,article_url,platform_id,article_nickname,article_data,article_date,article_parent,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:ID,:URL,:PLATFORMID,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
|
||||
@@ -943,27 +819,7 @@ void SCrawler::saveFrameCafeList(QWebFrame *frame)
|
||||
}
|
||||
|
||||
if (strUrl.split("/").at(2) == "cafe.naver.com")
|
||||
{
|
||||
//QSqlQuery sql;
|
||||
|
||||
//if (sql.size() == 0 || sql.size() == -1)
|
||||
{
|
||||
/*
|
||||
QString strQuery = QString("insert into ");
|
||||
strQuery += m_strTable;
|
||||
strQuery += QString(" set platform_name='naver',platform_form='cafe',article_form='body',article_url='%1',platform_id='%2',article_title='%3',keyword_id='%4'").arg(strUrl).arg(strUrl.split("/").at(3)).arg(strTitle).arg(m_strKeywordID);
|
||||
QString strUtf8(strQuery.toUtf8());
|
||||
if (sql.exec(strUtf8) == false)
|
||||
cout << "x " << sql.lastError().text().toStdString();
|
||||
else
|
||||
*/
|
||||
{
|
||||
cout << "o " << strUrl.toStdString() << endl;
|
||||
}
|
||||
}
|
||||
//else
|
||||
// cout << "v " << strUrl.toStdString() << endl;
|
||||
}
|
||||
cout << "o " << strUrl.toStdString() << endl;
|
||||
m_bUse = true;
|
||||
}
|
||||
}
|
||||
@@ -1308,7 +1164,6 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
{
|
||||
if (m_bUse) return;
|
||||
|
||||
|
||||
QWebElement other = frame->documentElement().findFirst("title");
|
||||
QString strTitle = other.toPlainText().trimmed().split("|").at(0).trimmed();
|
||||
QString strUrl_;
|
||||
@@ -1328,26 +1183,12 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
strUrl_ = m_strUrl;
|
||||
}
|
||||
bodydata.setData(SqlString(GetSafeUtf(strTitle)), bodydata.PLATFORM_TITLE);
|
||||
/*
|
||||
QString strQuery = "update ";
|
||||
strQuery += m_strTable;
|
||||
strQuery += " set ";
|
||||
strQuery += "platform_title = '" + SqlString(GetSafeUtf(strTitle)) + "'";
|
||||
strQuery += "where article_url='";
|
||||
strQuery += m_strUrl;
|
||||
strQuery += "'";
|
||||
QString strUtf8(strQuery.toUtf8());
|
||||
QSqlQuery sql;
|
||||
if (sql.exec(strUtf8) == false)
|
||||
cout << "error : " << sql.lastError().text().toStdString();
|
||||
*/
|
||||
}
|
||||
|
||||
if (frame->frameName() == "down")
|
||||
{
|
||||
QString strHits;
|
||||
{
|
||||
//QString strData,strDate,strNick,strID,strHits;
|
||||
QString strData,strDate,strNick,strID,strTitle;
|
||||
{
|
||||
QWebElement group = Find(frame->documentElement(),"div","class","bbs_contents");
|
||||
@@ -1359,12 +1200,7 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
strDate = group.toPlainText().trimmed().replace(".","-");
|
||||
strDate = strDate.replace("- "," ");
|
||||
if (strDate.isEmpty() == true)
|
||||
{
|
||||
//QWebElement subgroup = Find(frame->documentElement(),"input","name","PLAIN_REGDT").attribute("value");
|
||||
//strDate = subgroup.toPlainText().trimmed();
|
||||
strDate = Find(frame->documentElement(),"input","name","PLAIN_REGDT").attribute("value");
|
||||
//strDate += " 00:00:00";
|
||||
}
|
||||
else
|
||||
strDate += ":00";
|
||||
}
|
||||
@@ -1398,12 +1234,6 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
if (strHits.isEmpty())
|
||||
{
|
||||
strHits = Find(frame->documentElement(),"span","class","kin_count m-tcol-c _rosReadcount").toPlainText();
|
||||
}
|
||||
*/
|
||||
{
|
||||
bodydata.setTable(m_strTable);
|
||||
bodydata.setData(strData, bodydata.ARTICLE_DATA);
|
||||
@@ -1428,36 +1258,13 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
bodydata.setData("body", bodydata.ARTICLE_FORM);
|
||||
bodydata.setData(m_strUrl.split("/").at(3), bodydata.PLATFORM_ID);
|
||||
bodydata.setData(m_strKeywordID, bodydata.KEYWORD_ID);
|
||||
bodydata.setData(strTitle, bodydata.ARTICLE_TITLE);
|
||||
|
||||
/*
|
||||
QSqlQuery sql;
|
||||
QString strQuery = "update ";
|
||||
strQuery += m_strTable;
|
||||
strQuery += " set ";
|
||||
strQuery += "article_data = '" + strData + "',";
|
||||
strQuery += "article_date = '" + strDate + "',";
|
||||
strQuery += "article_nickname = '" + strNick + "',";
|
||||
if(!strID.isEmpty())
|
||||
strQuery += "article_id = '" + strID + "',";
|
||||
strQuery += "article_hit = '" + strHits + "'";
|
||||
strQuery += "where article_url='";
|
||||
strQuery += m_strUrl;
|
||||
strQuery += "'";
|
||||
QString strUtf8(strQuery.toUtf8());
|
||||
if (sql.exec(strUtf8) == false)
|
||||
cout << "error : " << sql.lastError().text().toStdString();
|
||||
*/
|
||||
bodydata.setData(strTitle, bodydata.ARTICLE_TITLE);
|
||||
}
|
||||
}
|
||||
// Comment
|
||||
{
|
||||
QWebElement group = Find(frame->documentElement(),"div","class","commentDiv bg_sub");
|
||||
QList<QWebElement> elements = FindAllMid(group,"div","id","_cmt-",0,5);
|
||||
/*
|
||||
* foreach(QWebElement element, elements)
|
||||
cout << "element = " << element.toPlainText().toStdString() << endl;
|
||||
*/
|
||||
QString commHidden = "comment_hidden";
|
||||
QString commPos = "comment_pos";
|
||||
QString commReComm = "recomment_pos";
|
||||
@@ -1508,8 +1315,7 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
||||
query.bindValue(":ID",strID.toUtf8());
|
||||
query.bindValue(":NICK",strNick.toUtf8());
|
||||
query.bindValue(":DATA",strData.toUtf8());
|
||||
query.bindValue(":DATE",strDate.toUtf8());
|
||||
//query.bindValue(":URLREPLY",m_strReper.toUtf8());
|
||||
query.bindValue(":DATE",strDate.toUtf8());
|
||||
query.bindValue(":ROWNUM",nCount++);
|
||||
query.bindValue(":PLATFORMID",m_strUrl.split("/").at(3).toUtf8());
|
||||
query.bindValue(":HITS",strHits.toUtf8());
|
||||
@@ -1588,6 +1394,12 @@ void SCrawler::saveFrameDaumBlogList(QWebFrame *frame){}
|
||||
void SCrawler::saveFrameNewsList(QWebFrame *frame)
|
||||
{
|
||||
if (m_bUse == true) return;
|
||||
QWebElement notFound = Find(frame->documentElement(),"div","class","no_content");
|
||||
if(notFound.isNull() == false)
|
||||
{
|
||||
m_bLast = true;
|
||||
return;
|
||||
}
|
||||
QWebElement eleMain = Find(frame->documentElement(),"div","class","srch_result_area headline");
|
||||
foreach(QWebElement eleSub,eleMain.findAll("div"))
|
||||
{
|
||||
@@ -1596,7 +1408,7 @@ void SCrawler::saveFrameNewsList(QWebFrame *frame)
|
||||
QString str = Find(eleSub,"a","class","go_naver").attribute("href");
|
||||
if (str.trimmed().isEmpty()) continue;
|
||||
if (str.contains("http://sports")) continue;
|
||||
m_bNothing = true;
|
||||
m_bNothing = true;
|
||||
cout << "o " << str.toStdString() << endl;
|
||||
}
|
||||
}
|
||||
@@ -1605,16 +1417,13 @@ void SCrawler::saveFrameNewsList(QWebFrame *frame)
|
||||
foreach(QString str,strTotal)
|
||||
{
|
||||
if (str.trimmed().isEmpty() == false)
|
||||
{
|
||||
vecTotal.push_back(str.toInt());
|
||||
}
|
||||
}
|
||||
|
||||
if (vecTotal.size() == 3)
|
||||
{
|
||||
if (vecTotal[0] >= vecTotal[1])
|
||||
m_bLast = true;
|
||||
if (vecTotal[1] == vecTotal[2])
|
||||
m_bLast = true;
|
||||
if (vecTotal[0] >= vecTotal[1]) m_bLast = true;
|
||||
if (vecTotal[1] == vecTotal[2]) m_bLast = true;
|
||||
}
|
||||
else
|
||||
m_bError = true;
|
||||
|
||||
Reference in New Issue
Block a user