git-svn-id: svn://192.168.0.12/source@29 8346c931-da38-4b9b-9d4c-e48b93cbd075

This commit is contained in:
admin
2015-02-26 06:28:26 +00:00
parent aaebce6cf6
commit 21f33f69e2
3 changed files with 70 additions and 24 deletions

View File

@@ -67,7 +67,7 @@ void SCrawler::load(QStringList _strlistArgv)
{
m_strUrl = _strlistArgv[2];
m_nSelect = E_NAVER_CAFE_DATA;
m_strReper = _strlistArgv[4];
m_strReper = _strlistArgv[4];
}
if (_strlistArgv[1] == "blog_list")
@@ -76,6 +76,33 @@ void SCrawler::load(QStringList _strlistArgv)
m_nSelect = E_NAVER_BLOG_LIST;
m_strKeywordID = _strlistArgv[4];
//cout << "ok";
QFile file("proxy.txt");
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
{
QVector <QStringList> vecProxy;
while (!file.atEnd())
{
QString str = QString(file.readLine());
if (str.isEmpty()) continue;
vecProxy.push_back(str.split(","));
}
if (vecProxy.size() > 0)
{
QStringList strList = vecProxy.at(rand()%vecProxy.size());
switch(strList.size())
{
case 1:
cout << "p : " << strList.at(0).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
break;
case 2:
cout << "p : " << strList.at(0).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
break;
}
}
}
}
if (_strlistArgv[1] == "blog_url")
@@ -241,17 +268,16 @@ void SCrawler::saveFrameList(QWebFrame *frame)
m_bUse = true;
return;
}
cout << "url : " << strUrl.toStdString();
QStringList strList = strUrl.split('/');
if (strList.at(0).compare("blog.naver.com") != 0 ) { cout << " not" << endl; continue; };
if (strList.at(0).compare("blog.naver.com") != 0 ) { cout << "x http://" << strUrl.toStdString() <<endl; continue; };
QString strQuery = "select article_url from ";
strQuery += m_strTable;
strQuery += QString(" where article_url = '%1'").arg(strUrl);
strQuery += QString(" where article_url = 'http://%1'").arg(strUrl);
sql.exec(strQuery);
if (sql.size() == -1)
if (sql.size() == 0 || sql.size() == -1)
{
QString str = Find(sub,"a","class","txt84").toPlainText();
str = GetSafeUtf(str);
@@ -261,15 +287,16 @@ void SCrawler::saveFrameList(QWebFrame *frame)
QString strQuery = QString("insert into ");
strQuery += m_strTable;
strQuery += QString(" set article_url='%1',platform_id='%2',platform_title='%3',keyword_id='%4'").arg("http://"+strUrl).arg(strUrl.split("/").at(1)).arg(str).arg(m_strKeywordID);
strQuery += QString(" set platform_name='naver',platform_form='blog',article_form='body',article_url='%1',platform_id='%2',platform_title='%3',keyword_id='%4'").arg("http://"+strUrl).arg(strUrl.split("/").at(1)).arg(str).arg(m_strKeywordID);
QString strUtf8(strQuery.toUtf8());
if (sql.exec(strUtf8) == false)
cout << "error : " << sql.lastError().text().toStdString();
else
cout << " ok" << endl;
cout << "o ";
}
else
cout << " overlap" << endl;
cout << "v ";
cout << "http://" << strUrl.toStdString() << endl;
m_bUse = true;
}
@@ -395,7 +422,6 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
if ( str[E_DATA_DATE].isEmpty() == false)
{
str[E_DATA_DATE] += ":00";
cout << "str[E_DATA_DATE] = " << str[E_DATA_DATE].toStdString() << endl;
}
else
{
@@ -461,7 +487,7 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
strNick = strParent = Find(element,"a","class","nick pcol2").toPlainText();
strDate = Find(element,"span","class","date fil5 pcol2").toPlainText();
strComm = Find(element,"dd","class","comm pcol2").toPlainText();
//strComm = GetSafeUtf(strComm);
strComm = GetSafeUtf(strComm);
if (strComm.isEmpty()== false)
{
strComm.replace("'","\\'");
@@ -469,13 +495,15 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
strComm = strComm.trimmed();
//cout << strComm.toStdString() << endl;
QSqlQuery query;
query.prepare(QString("insert into " + m_strTable + " (article_url,article_nickname,article_data,article_date,article_parent,reply_url,article_order) VALUES (:URL,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM)").toUtf8());
query.prepare(QString("insert into " + m_strTable + " (article_url,article_nickname,article_data,article_date,article_parent,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:URL,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
/*
cout << "m_strTable = " << m_strTable.toStdString() << endl;
cout << "url = " << strUrl.toStdString() << endl << "nickname = " << strNick.toStdString() << endl;
cout << "data = " << strComm.toStdString() << endl;
cout << "date = " << strDate.toStdString() << endl;
cout << "urlReply = " << m_strUrl.toStdString() << endl;
cout << "ronum = " << nCount << endl;
*/
query.bindValue(":URL", strUrl.toUtf8());
query.bindValue(":NICK",strNick.toUtf8());
query.bindValue(":DATA",strComm.toUtf8());
@@ -503,19 +531,22 @@ void SCrawler::saveFrameComment(QWebFrame *frame)
if (strComm.isEmpty() == false)
{
// strComm = GetSafeUtf(strComm);
strComm = GetSafeUtf(strComm);
strComm.replace("'","\\'");
strComm.replace("\"","\\\"");
strComm = strComm.trimmed();
QSqlQuery query;
cout << "m_strTable = " << m_strTable.toStdString() << endl;
/*
cout << "m_strTable = " << m_strTable.toStdString() << endl;
cout << "url = " << strUrl.toStdString() << endl << "nickname = " << strNick.toStdString() << endl;
cout << "data = " << strComm.toStdString() << endl;
cout << "date = " << strDate.toStdString() << endl;
cout << "urlReply = " << m_strUrl.toStdString() << endl;
cout << "ronum = " << nCount << endl;
cout << "parent = " << strParent.toStdString() << endl;
query.prepare(QString("insert into " + m_strTable + " (article_url,article_nickname,article_data,article_date,article_parent,reply_url,article_order) VALUES (:URL,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM)").toUtf8());
*/
query.prepare(QString("insert into " + m_strTable + " (article_url,article_nickname,article_data,article_date,article_parent,reply_url,article_order,platform_name,platform_form,article_form) VALUES (:URL,:NICK,:DATA,:DATE,:PARENT,:URLREPLY,:ROWNUM,'naver','blog','reply')").toUtf8());
query.bindValue(":URL",strUrl.toUtf8());
query.bindValue(":NICK",strNick.toUtf8());
query.bindValue(":DATA",strComm.toUtf8());