다음카페 댓글 시간만 표시될때 크롤링 에러나는 부분 임시 수정
git-svn-id: svn://192.168.0.12/source@61 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -61,35 +61,8 @@ void SCrawler::load(QStringList _strlistArgv)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}*/
|
||||||
QString proxyList;
|
|
||||||
if (getProxyList(proxyList))
|
|
||||||
{
|
|
||||||
QVector <QStringList> vecProxy;
|
|
||||||
QStringList strListProxy = proxyList.split("\n");
|
|
||||||
foreach(QString str, strListProxy)
|
|
||||||
{
|
|
||||||
str = str.trimmed();
|
|
||||||
if (str.isEmpty()) continue;
|
|
||||||
vecProxy.push_back(str.split(","));
|
|
||||||
}
|
|
||||||
if (vecProxy.size() > 0)
|
|
||||||
{
|
|
||||||
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
|
||||||
switch(strList.size())
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
|
||||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
cout << "p : " << strList.at(0).toStdString() << ", " << strList.at(1).toStdString() << endl;
|
|
||||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
setProxy();
|
setProxy();
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -106,34 +79,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
|||||||
m_strUrl = _strlistArgv[2];
|
m_strUrl = _strlistArgv[2];
|
||||||
m_nSelect = E_NAVER_BLOG_LIST;
|
m_nSelect = E_NAVER_BLOG_LIST;
|
||||||
m_strKeywordID = _strlistArgv[4];
|
m_strKeywordID = _strlistArgv[4];
|
||||||
//cout << "ok";
|
|
||||||
/*
|
|
||||||
QFile file("proxy.txt");
|
|
||||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
|
||||||
{
|
|
||||||
QVector <QStringList> vecProxy;
|
|
||||||
while (!file.atEnd())
|
|
||||||
{
|
|
||||||
QString str = QString(file.readLine());
|
|
||||||
if (str.isEmpty()) continue;
|
|
||||||
vecProxy.push_back(str.split(","));
|
|
||||||
}
|
|
||||||
if (vecProxy.size() > 0)
|
|
||||||
{
|
|
||||||
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
|
||||||
switch(strList.size())
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
|
||||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
|
||||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
setProxy();
|
setProxy();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -162,33 +108,6 @@ void SCrawler::load(QStringList _strlistArgv)
|
|||||||
m_nSelect = E_DAUM_CAFE_LIST;
|
m_nSelect = E_DAUM_CAFE_LIST;
|
||||||
m_strKeywordID = _strlistArgv[4];
|
m_strKeywordID = _strlistArgv[4];
|
||||||
setProxy();
|
setProxy();
|
||||||
/*
|
|
||||||
QFile file("proxy.txt");
|
|
||||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
|
||||||
{
|
|
||||||
QVector <QStringList> vecProxy;
|
|
||||||
while (!file.atEnd())
|
|
||||||
{
|
|
||||||
QString str = QString(file.readLine());
|
|
||||||
if (str.isEmpty()) continue;
|
|
||||||
vecProxy.push_back(str.split(","));
|
|
||||||
}
|
|
||||||
if (vecProxy.size() > 0)
|
|
||||||
{
|
|
||||||
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
|
||||||
switch(strList.size())
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
|
||||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
|
||||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_strlistArgv[1] == "cafe_data")
|
if (_strlistArgv[1] == "cafe_data")
|
||||||
@@ -204,33 +123,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
|||||||
m_nSelect = E_DAUM_BLOG_LIST;
|
m_nSelect = E_DAUM_BLOG_LIST;
|
||||||
m_strKeywordID = _strlistArgv[4];
|
m_strKeywordID = _strlistArgv[4];
|
||||||
//cout << "ok";
|
//cout << "ok";
|
||||||
|
setProxy();
|
||||||
QFile file("proxy.txt");
|
|
||||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
|
||||||
{
|
|
||||||
QVector <QStringList> vecProxy;
|
|
||||||
while (!file.atEnd())
|
|
||||||
{
|
|
||||||
QString str = QString(file.readLine());
|
|
||||||
if (str.isEmpty()) continue;
|
|
||||||
vecProxy.push_back(str.split(","));
|
|
||||||
}
|
|
||||||
if (vecProxy.size() > 0)
|
|
||||||
{
|
|
||||||
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
|
||||||
switch(strList.size())
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
|
||||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
|
||||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_strlistArgv[1] == "blog_url")
|
if (_strlistArgv[1] == "blog_url")
|
||||||
@@ -697,9 +590,9 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
|||||||
{
|
{
|
||||||
QWebElement group = Find(frame->documentElement(),"ul","id","commentList");
|
QWebElement group = Find(frame->documentElement(),"ul","id","commentList");
|
||||||
QWebElementCollection elements = group.findAll("li");
|
QWebElementCollection elements = group.findAll("li");
|
||||||
QString strParent,strDate,strNick,strComm,strUrl;
|
QString strParent,strDate,strNick,strComm,strUrl,strId;
|
||||||
QStringList strList = m_strUrl.split("/");
|
QStringList strList = m_strUrl.split("/");
|
||||||
|
QString strCommUrl;
|
||||||
for (int i=0; i < 5; i++)
|
for (int i=0; i < 5; i++)
|
||||||
strUrl += strList.at(i) + "/";
|
strUrl += strList.at(i) + "/";
|
||||||
|
|
||||||
@@ -713,6 +606,13 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
|||||||
strNick = strParent = GetSafeUtf(Find(element,"a","class","nick pcol2").toPlainText());
|
strNick = strParent = GetSafeUtf(Find(element,"a","class","nick pcol2").toPlainText());
|
||||||
strDate = Find(element,"span","class","date fil5 pcol2").toPlainText();
|
strDate = Find(element,"span","class","date fil5 pcol2").toPlainText();
|
||||||
strComm = Find(element,"dd","class","comm pcol2").toPlainText();
|
strComm = Find(element,"dd","class","comm pcol2").toPlainText();
|
||||||
|
|
||||||
|
strCommUrl = Find(element,"a","class","nick pcol2").attribute("href");
|
||||||
|
if(strCommUrl.left(QString("http://blog.naver.com").length()).compare("http://blog.naver.com") == 0)
|
||||||
|
strId = strCommUrl.split("/").at(3).trimmed();
|
||||||
|
if(strCommUrl.right(QString("blog.me").length()).compare("blog.me") == 0)
|
||||||
|
strId = strCommUrl.split("/").at(2).split(".").at(0).trimmed();
|
||||||
|
|
||||||
strComm = GetSafeUtf(strComm);
|
strComm = GetSafeUtf(strComm);
|
||||||
if (strComm.isEmpty()== false)
|
if (strComm.isEmpty()== false)
|
||||||
{
|
{
|
||||||
@@ -721,7 +621,13 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
|||||||
strComm = strComm.trimmed();
|
strComm = strComm.trimmed();
|
||||||
//cout << strComm.toStdString() << endl;
|
//cout << strComm.toStdString() << endl;
|
||||||
QSqlQuery query;
|
QSqlQuery query;
|
||||||
query.prepare(QString("insert into " + m_strTable + " (article_url,article_nickname,article_data,article_date,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:URL,:NICK,:DATA,:DATE,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
|
if(strId.length() > 0)
|
||||||
|
{
|
||||||
|
query.prepare(QString("insert into " + m_strTable + " (article_id,article_url,platform_id,article_nickname,article_data,article_date,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:ID,:URL,:PLATFORMID,:NICK,:DATA,:DATE,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
|
||||||
|
query.bindValue(":ID", strId.toUtf8());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
query.prepare(QString("insert into " + m_strTable + " (article_url,platform_id,article_nickname,article_data,article_date,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:URL,:PLATFORMID,:NICK,:DATA,:DATE,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
|
||||||
/*
|
/*
|
||||||
cout << "m_strTable = " << m_strTable.toStdString() << endl;
|
cout << "m_strTable = " << m_strTable.toStdString() << endl;
|
||||||
cout << "url = " << strUrl.toStdString() << endl << "nickname = " << strNick.toStdString() << endl;
|
cout << "url = " << strUrl.toStdString() << endl << "nickname = " << strNick.toStdString() << endl;
|
||||||
@@ -730,7 +636,9 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
|||||||
cout << "urlReply = " << m_strUrl.toStdString() << endl;
|
cout << "urlReply = " << m_strUrl.toStdString() << endl;
|
||||||
cout << "ronum = " << nCount << endl;
|
cout << "ronum = " << nCount << endl;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
query.bindValue(":URL", strUrl.toUtf8());
|
query.bindValue(":URL", strUrl.toUtf8());
|
||||||
|
query.bindValue(":PLATFORMID",strUrl.split("/").at(3).toUtf8());
|
||||||
query.bindValue(":NICK",strNick.toUtf8());
|
query.bindValue(":NICK",strNick.toUtf8());
|
||||||
query.bindValue(":DATA",strComm.toUtf8());
|
query.bindValue(":DATA",strComm.toUtf8());
|
||||||
query.bindValue(":DATE",strDate.toUtf8());
|
query.bindValue(":DATE",strDate.toUtf8());
|
||||||
@@ -748,7 +656,14 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
|||||||
strDate = Find(element,"span","class","date fil5 pcol2").toPlainText();
|
strDate = Find(element,"span","class","date fil5 pcol2").toPlainText();
|
||||||
QWebElement subElement = Find(element,"dd","class","comm pcol2");
|
QWebElement subElement = Find(element,"dd","class","comm pcol2");
|
||||||
QString subNick = Find(subElement,"a","class","nick pcol2").toPlainText();
|
QString subNick = Find(subElement,"a","class","nick pcol2").toPlainText();
|
||||||
strComm = subElement.toPlainText();
|
strComm = subElement.toPlainText();
|
||||||
|
|
||||||
|
strCommUrl = Find(element,"a","class","nick pcol2").attribute("href");
|
||||||
|
if(strCommUrl.left(QString("http://blog.naver.com").length()).compare("http://blog.naver.com") == 0)
|
||||||
|
strId = strCommUrl.split("/").at(3).trimmed();
|
||||||
|
if(strCommUrl.right(QString("blog.me").length()).compare("blog.me") == 0)
|
||||||
|
strId = strCommUrl.split("/").at(2).split(".").at(0).trimmed();
|
||||||
|
|
||||||
if(subNick.isEmpty() == false)
|
if(subNick.isEmpty() == false)
|
||||||
{
|
{
|
||||||
strNick = strParent;
|
strNick = strParent;
|
||||||
@@ -771,9 +686,16 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
|
|||||||
cout << "ronum = " << nCount << endl;
|
cout << "ronum = " << nCount << endl;
|
||||||
cout << "parent = " << strParent.toStdString() << endl;
|
cout << "parent = " << strParent.toStdString() << endl;
|
||||||
*/
|
*/
|
||||||
|
if(strId.length() > 0)
|
||||||
|
{
|
||||||
|
query.prepare(QString("insert into " + m_strTable + " (article_id,article_url,platform_id,article_nickname,article_data,article_date,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:ID,:URL,:PLATFORMID,:NICK,:DATA,:DATE,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
|
||||||
|
query.bindValue(":ID", strId.toUtf8());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
query.prepare(QString("insert into " + m_strTable + " (article_url,platform_id,article_nickname,article_data,article_date,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:URL,:PLATFORMID,:NICK,:DATA,:DATE,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
|
||||||
|
|
||||||
query.prepare(QString("insert into " + m_strTable + " (article_url,article_nickname,article_data,article_date,article_parent,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:URL,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
|
|
||||||
query.bindValue(":URL",strUrl.toUtf8());
|
query.bindValue(":URL",strUrl.toUtf8());
|
||||||
|
query.bindValue(":PLATFORMID",strUrl.split("/").at(3).toUtf8());
|
||||||
query.bindValue(":NICK",strNick.toUtf8());
|
query.bindValue(":NICK",strNick.toUtf8());
|
||||||
query.bindValue(":DATA",strComm.toUtf8());
|
query.bindValue(":DATA",strComm.toUtf8());
|
||||||
query.bindValue(":DATE",strDate.toUtf8());
|
query.bindValue(":DATE",strDate.toUtf8());
|
||||||
@@ -818,13 +740,12 @@ void SCrawler::saveFrameCafeList(QWebFrame *frame)
|
|||||||
if (strUrl.split("/").at(2) == "cafe.naver.com")
|
if (strUrl.split("/").at(2) == "cafe.naver.com")
|
||||||
{
|
{
|
||||||
QSqlQuery sql;
|
QSqlQuery sql;
|
||||||
/*
|
|
||||||
QString strQuery = "select URL from ";
|
QString strQuery = "select article_url from ";
|
||||||
strQuery += m_strTableBody;
|
strQuery += m_strTable;
|
||||||
strQuery += QString(" where URL = '%1'").arg(strUrl);
|
strQuery += QString(" where article_url = '%1'").arg(strUrl);
|
||||||
sql.exec(strQuery);
|
sql.exec(strQuery);
|
||||||
if (sql.size() == 0)
|
if (sql.size() == 0 || sql.size() == -1)
|
||||||
*/
|
|
||||||
{
|
{
|
||||||
QString strQuery = QString("insert into ");
|
QString strQuery = QString("insert into ");
|
||||||
strQuery += m_strTable;
|
strQuery += m_strTable;
|
||||||
@@ -833,8 +754,12 @@ void SCrawler::saveFrameCafeList(QWebFrame *frame)
|
|||||||
if (sql.exec(strUtf8) == false)
|
if (sql.exec(strUtf8) == false)
|
||||||
cout << "x " << sql.lastError().text().toStdString();
|
cout << "x " << sql.lastError().text().toStdString();
|
||||||
else
|
else
|
||||||
|
{
|
||||||
cout << "o " << strUrl.toStdString() << endl;
|
cout << "o " << strUrl.toStdString() << endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
cout << "v " << strUrl.toStdString() << endl;
|
||||||
}
|
}
|
||||||
m_bUse = true;
|
m_bUse = true;
|
||||||
}
|
}
|
||||||
@@ -962,8 +887,9 @@ void SCrawler::saveFrameCafeUrl(QWebFrame *frame)
|
|||||||
if (strDate.isEmpty()) continue;
|
if (strDate.isEmpty()) continue;
|
||||||
|
|
||||||
QSqlQuery query;
|
QSqlQuery query;
|
||||||
query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,reply_url,article_order) VALUES ('naver','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:URLREPLY,:ROWNUM)").toUtf8());
|
query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,platform_id,article_id,article_nickname,article_data,article_date,reply_url,article_order) VALUES ('naver','cafe','reply',:URL,:PLATFORMID,:ID,:NICK,:DATA,:DATE,:URLREPLY,:ROWNUM)").toUtf8());
|
||||||
query.bindValue(":URL",m_strUrl.toUtf8());
|
query.bindValue(":URL",m_strUrl.toUtf8());
|
||||||
|
query.bindValue(":PLATFORMID",m_strUrl.split("/").at(3).toUtf8());
|
||||||
query.bindValue(":ID",strID.toUtf8());
|
query.bindValue(":ID",strID.toUtf8());
|
||||||
query.bindValue(":NICK",strNick.toUtf8());
|
query.bindValue(":NICK",strNick.toUtf8());
|
||||||
query.bindValue(":DATA",strData.toUtf8());
|
query.bindValue(":DATA",strData.toUtf8());
|
||||||
@@ -986,8 +912,9 @@ void SCrawler::saveFrameCafeUrl(QWebFrame *frame)
|
|||||||
if (eleParent.toPlainText().isEmpty() == false)
|
if (eleParent.toPlainText().isEmpty() == false)
|
||||||
strReParent = eleParent.toPlainText();
|
strReParent = eleParent.toPlainText();
|
||||||
QSqlQuery query;
|
QSqlQuery query;
|
||||||
query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,article_parent,reply_url,article_order) VALUES ('naver','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM)").toUtf8());
|
query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,platform_id,article_id,article_nickname,article_data,article_date,article_parent,reply_url,article_order) VALUES ('naver','cafe','reply',:URL,:PLATFORMID,:ID,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM)").toUtf8());
|
||||||
query.bindValue(":URL",m_strUrl.toUtf8());
|
query.bindValue(":URL",m_strUrl.toUtf8());
|
||||||
|
query.bindValue(":PLATFORMID",m_strUrl.split("/").at(3).toUtf8());
|
||||||
query.bindValue(":ID",strID.toUtf8());
|
query.bindValue(":ID",strID.toUtf8());
|
||||||
query.bindValue(":NICK",strNick.toUtf8());
|
query.bindValue(":NICK",strNick.toUtf8());
|
||||||
query.bindValue(":DATA",strData.toUtf8());
|
query.bindValue(":DATA",strData.toUtf8());
|
||||||
@@ -1033,13 +960,12 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
|
|||||||
if (strUrl.split("/").at(2) == "cafe.daum.net")
|
if (strUrl.split("/").at(2) == "cafe.daum.net")
|
||||||
{
|
{
|
||||||
QSqlQuery sql;
|
QSqlQuery sql;
|
||||||
/*
|
|
||||||
QString strQuery = "select URL from ";
|
QString strQuery = "select article_url from ";
|
||||||
strQuery += m_strTableBody;
|
strQuery += m_strTable;
|
||||||
strQuery += QString(" where URL = '%1'").arg(strUrl);
|
strQuery += QString(" where article_url = '%1'").arg(strUrl);
|
||||||
sql.exec(strQuery);
|
sql.exec(strQuery);
|
||||||
if (sql.size() == 0)
|
if (sql.size() == 0 || sql.size() == -1)
|
||||||
*/
|
|
||||||
{
|
{
|
||||||
QString strQuery = QString("insert into ");
|
QString strQuery = QString("insert into ");
|
||||||
strQuery += m_strTable;
|
strQuery += m_strTable;
|
||||||
@@ -1050,6 +976,8 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
|
|||||||
else
|
else
|
||||||
cout << "o " << strUrl.toStdString() << endl;
|
cout << "o " << strUrl.toStdString() << endl;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
cout << "v " << strUrl.toStdString() << endl;
|
||||||
}
|
}
|
||||||
m_bUse = true;
|
m_bUse = true;
|
||||||
}
|
}
|
||||||
@@ -1126,27 +1054,10 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
|||||||
QWebElement group = Find(Find(frame->documentElement(),"div","class","article_writer"),"a","href","#");
|
QWebElement group = Find(Find(frame->documentElement(),"div","class","article_writer"),"a","href","#");
|
||||||
strNick = group.toPlainText().trimmed();
|
strNick = group.toPlainText().trimmed();
|
||||||
|
|
||||||
if (strNick.isEmpty() == false)
|
QWebElement id = Find(frame->documentElement(),"div","class","article_writer");
|
||||||
{
|
QStringList list = FindLeft(id,"a","onclick","showSideView").attribute("onclick").trimmed().split(",");
|
||||||
/*
|
if (list.size() >= 2)
|
||||||
QStringList list = strNick.split("(");
|
strID = list.at(1).trimmed().replace("'","");
|
||||||
if (list.isEmpty() == false)
|
|
||||||
strNick = list.at(0);
|
|
||||||
*/
|
|
||||||
QWebElement id = Find(frame->documentElement(),"td","class","m-tcol-c b nick");
|
|
||||||
QStringList list = Find(id,"a","class","m-tcol-c b").attribute("onclick").trimmed().split(",");
|
|
||||||
if (list.size() >= 2)
|
|
||||||
strID = list.at(1).trimmed().replace("'","");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
QStringList list = Find(Find(frame->documentElement(),"td","class","m-tcol-c b nick"),"a","class","m-tcol-c b").attribute("onclick").trimmed().split(",");
|
|
||||||
if (list.size() >= 4)
|
|
||||||
{
|
|
||||||
strID = list.at(1).trimmed().replace("'","");
|
|
||||||
strNick = list.at(3).trimmed().replace("'","");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
QStringList strList = Find(frame->documentElement(),"div","class","article_writer").toPlainText().split("|");
|
QStringList strList = Find(frame->documentElement(),"div","class","article_writer").toPlainText().split("|");
|
||||||
@@ -1210,15 +1121,31 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
|||||||
QString strData = SqlString(Find(element,"span","class","comment_contents").toPlainText().trimmed());
|
QString strData = SqlString(Find(element,"span","class","comment_contents").toPlainText().trimmed());
|
||||||
if (strData.isEmpty()) continue;
|
if (strData.isEmpty()) continue;
|
||||||
strData = GetSafeUtf(strData);
|
strData = GetSafeUtf(strData);
|
||||||
QString strID = Find(element,"input","name","writerid").attribute("value").trimmed();
|
|
||||||
|
QString strID;
|
||||||
|
QStringList strListID = Find(element,"a","class","b").attribute("onclick").split(",");
|
||||||
|
if(strListID.length() > 2)
|
||||||
|
strID = strListID.at(1).trimmed().replace("'","");
|
||||||
|
|
||||||
QString strNick = Find(element,"a","class","b").toPlainText().trimmed();
|
QString strNick = Find(element,"a","class","b").toPlainText().trimmed();
|
||||||
strParent = strNick;
|
strParent = strNick;
|
||||||
QString strDate = Find(element,"span","class","comment_date txt_sub p11 ls0").toPlainText().trimmed().replace(".","-").replace("- "," ");
|
|
||||||
|
QString strDatetest = Find(element,"span","class","comment_date txt_sub p11 ls0").toPlainText().trimmed();
|
||||||
|
QString strDate;
|
||||||
|
if(strDatetest.count(".") == 0)
|
||||||
|
{
|
||||||
|
strDate = QDateTime::currentDateTime().toString("yyyy-MM-dd");
|
||||||
|
strDate += (" " + strDatetest);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
strDate = Find(element,"span","class","comment_date txt_sub p11 ls0").toPlainText().trimmed().replace(".","-").replace("- "," ");
|
||||||
|
}
|
||||||
|
|
||||||
if (strDate.isEmpty()) continue;
|
if (strDate.isEmpty()) continue;
|
||||||
else strDate += ":00";
|
else strDate += ":00";
|
||||||
QSqlQuery query;
|
QSqlQuery query;
|
||||||
//query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,reply_url,article_order) VALUES ('daum','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:URLREPLY,:ROWNUM)").toUtf8());
|
|
||||||
//query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,article_order) VALUES ('daum','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:ROWNUM)").toUtf8());
|
|
||||||
query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,article_order,platform_id,article_hit,platform_title) VALUES ('daum','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:ROWNUM,:PLATFORMID,:HITS,:TITLE)").toUtf8());
|
query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,article_order,platform_id,article_hit,platform_title) VALUES ('daum','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:ROWNUM,:PLATFORMID,:HITS,:TITLE)").toUtf8());
|
||||||
query.bindValue(":URL",m_strUrl.toUtf8());
|
query.bindValue(":URL",m_strUrl.toUtf8());
|
||||||
query.bindValue(":ID",strID.toUtf8());
|
query.bindValue(":ID",strID.toUtf8());
|
||||||
@@ -1227,7 +1154,7 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
|||||||
query.bindValue(":DATE",strDate.toUtf8());
|
query.bindValue(":DATE",strDate.toUtf8());
|
||||||
//query.bindValue(":URLREPLY",m_strReper.toUtf8());
|
//query.bindValue(":URLREPLY",m_strReper.toUtf8());
|
||||||
query.bindValue(":ROWNUM",nCount++);
|
query.bindValue(":ROWNUM",nCount++);
|
||||||
query.bindValue(":PLATFORMID",m_strUrl.split("/").at(1).toUtf8());
|
query.bindValue(":PLATFORMID",m_strUrl.split("/").at(3).toUtf8());
|
||||||
query.bindValue(":HITS",strHits.toUtf8());
|
query.bindValue(":HITS",strHits.toUtf8());
|
||||||
query.bindValue(":TITLE",strTitle.toUtf8());
|
query.bindValue(":TITLE",strTitle.toUtf8());
|
||||||
|
|
||||||
@@ -1243,15 +1170,28 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
|||||||
if(strReParent.length() == 0)
|
if(strReParent.length() == 0)
|
||||||
strReParent = strParent;
|
strReParent = strParent;
|
||||||
|
|
||||||
QString strID = Find(element,"input","name","writerid").attribute("value").trimmed();
|
QString strID;
|
||||||
|
QStringList strListID = Find(element,"a","class","b").attribute("onclick").split(",");
|
||||||
|
if(strListID.length() > 2)
|
||||||
|
strID = strListID.at(1).trimmed().replace("'","");
|
||||||
|
|
||||||
QString strNick = Find(element,"a","class","b").toPlainText().trimmed();
|
QString strNick = Find(element,"a","class","b").toPlainText().trimmed();
|
||||||
QString strDate = Find(element,"span","class","comment_date txt_sub p11 ls0").toPlainText().trimmed().replace(".","-").replace("- "," ");
|
QString strDatetest = Find(element,"span","class","comment_date txt_sub p11 ls0").toPlainText().trimmed();
|
||||||
|
QString strDate;
|
||||||
|
if(strDatetest.count(".") == 0)
|
||||||
|
{
|
||||||
|
strDate = QDateTime::currentDateTime().toString("yyyy-MM-dd");
|
||||||
|
strDate += (" " + strDatetest);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
strDate = Find(element,"span","class","comment_date txt_sub p11 ls0").toPlainText().trimmed().replace(".","-").replace("- "," ");
|
||||||
|
}
|
||||||
|
|
||||||
if (strDate.isEmpty()) continue;
|
if (strDate.isEmpty()) continue;
|
||||||
else strDate += ":00";
|
else strDate += ":00";
|
||||||
|
|
||||||
QSqlQuery query;
|
QSqlQuery query;
|
||||||
//query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,article_parent,reply_url,article_order) VALUES ('daum','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM)").toUtf8());
|
|
||||||
//query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,article_parent,article_order) VALUES ('daum','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:PARENT,:ROWNUM)").toUtf8());
|
|
||||||
query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,article_parent,article_order,platform_id,article_hit,platform_title) VALUES ('daum','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:PARENT,:ROWNUM,:PLATFORMID,:HITS,:TITLE)").toUtf8());
|
query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,article_parent,article_order,platform_id,article_hit,platform_title) VALUES ('daum','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:PARENT,:ROWNUM,:PLATFORMID,:HITS,:TITLE)").toUtf8());
|
||||||
query.bindValue(":URL",m_strUrl.toUtf8());
|
query.bindValue(":URL",m_strUrl.toUtf8());
|
||||||
query.bindValue(":ID",strID.toUtf8());
|
query.bindValue(":ID",strID.toUtf8());
|
||||||
@@ -1261,7 +1201,7 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame)
|
|||||||
query.bindValue(":PARENT",strReParent.toUtf8());
|
query.bindValue(":PARENT",strReParent.toUtf8());
|
||||||
//query.bindValue(":URLREPLY",m_strReper.toUtf8());
|
//query.bindValue(":URLREPLY",m_strReper.toUtf8());
|
||||||
query.bindValue(":ROWNUM",nCount++);
|
query.bindValue(":ROWNUM",nCount++);
|
||||||
query.bindValue(":PLATFORMID",m_strUrl.split("/").at(1).toUtf8());
|
query.bindValue(":PLATFORMID",m_strUrl.split("/").at(3).toUtf8());
|
||||||
query.bindValue(":HITS",strHits.toUtf8());
|
query.bindValue(":HITS",strHits.toUtf8());
|
||||||
query.bindValue(":TITLE",strTitle.toUtf8());
|
query.bindValue(":TITLE",strTitle.toUtf8());
|
||||||
//QWebView::page()->mainFrame()->evaluateJavaScript("");
|
//QWebView::page()->mainFrame()->evaluateJavaScript("");
|
||||||
|
|||||||
Reference in New Issue
Block a user