proxy db연동

네이버 블로그 리스트가 모드 v,x시 무한 reloadlist되는 버그 수정
다음 카페 크롤러 last조건 수정
네이버 article_profile 작성

git-svn-id: svn://192.168.0.12/source@57 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2015-03-17 08:17:38 +00:00
parent 4c855cfa5c
commit 639861a139
2 changed files with 107 additions and 8 deletions

View File

@@ -35,6 +35,7 @@ void SCrawler::load(QStringList _strlistArgv)
m_nSelect = E_NAVER_CAFE_LIST; m_nSelect = E_NAVER_CAFE_LIST;
m_strKeywordID = _strlistArgv[4]; m_strKeywordID = _strlistArgv[4];
/*
QFile file("proxy.txt"); QFile file("proxy.txt");
if (file.open(QIODevice::ReadOnly | QIODevice::Text)) if (file.open(QIODevice::ReadOnly | QIODevice::Text))
{ {
@@ -61,6 +62,36 @@ void SCrawler::load(QStringList _strlistArgv)
} }
} }
} }
QString proxyList;
if (getProxyList(proxyList))
{
QVector <QStringList> vecProxy;
QStringList strListProxy = proxyList.split("\n");
foreach(QString str, strListProxy)
{
str = str.trimmed();
if (str.isEmpty()) continue;
vecProxy.push_back(str.split(","));
}
if (vecProxy.size() > 0)
{
QStringList strList = vecProxy.at(rand()%vecProxy.size());
switch(strList.size())
{
case 1:
cout << "p : " << strList.at(0).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
break;
case 2:
cout << "p : " << strList.at(0).toStdString() << ", " << strList.at(1).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
break;
}
}
}
*/
setProxy();
} }
if (_strlistArgv[1] == "cafe_data") if (_strlistArgv[1] == "cafe_data")
@@ -76,7 +107,7 @@ void SCrawler::load(QStringList _strlistArgv)
m_nSelect = E_NAVER_BLOG_LIST; m_nSelect = E_NAVER_BLOG_LIST;
m_strKeywordID = _strlistArgv[4]; m_strKeywordID = _strlistArgv[4];
//cout << "ok"; //cout << "ok";
/*
QFile file("proxy.txt"); QFile file("proxy.txt");
if (file.open(QIODevice::ReadOnly | QIODevice::Text)) if (file.open(QIODevice::ReadOnly | QIODevice::Text))
{ {
@@ -102,7 +133,8 @@ void SCrawler::load(QStringList _strlistArgv)
break; break;
} }
} }
} }*/
setProxy();
} }
if (_strlistArgv[1] == "blog_url") if (_strlistArgv[1] == "blog_url")
@@ -129,7 +161,8 @@ void SCrawler::load(QStringList _strlistArgv)
m_strUrl = _strlistArgv[2]; m_strUrl = _strlistArgv[2];
m_nSelect = E_DAUM_CAFE_LIST; m_nSelect = E_DAUM_CAFE_LIST;
m_strKeywordID = _strlistArgv[4]; m_strKeywordID = _strlistArgv[4];
setProxy();
/*
QFile file("proxy.txt"); QFile file("proxy.txt");
if (file.open(QIODevice::ReadOnly | QIODevice::Text)) if (file.open(QIODevice::ReadOnly | QIODevice::Text))
{ {
@@ -155,7 +188,7 @@ void SCrawler::load(QStringList _strlistArgv)
break; break;
} }
} }
} }*/
} }
if (_strlistArgv[1] == "cafe_data") if (_strlistArgv[1] == "cafe_data")
@@ -504,6 +537,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
QString str[E_DATA_MAX]; QString str[E_DATA_MAX];
QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data","platform_title"}; QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data","platform_title"};
QString strSympathy; QString strSympathy;
QString strProfile;
QWebElement proTitle = Find(frame->documentElement(),"meta","property","og:article:author"); QWebElement proTitle = Find(frame->documentElement(),"meta","property","og:article:author");
str[E_DATA_PLATFORM_TITLE] = proTitle.attribute("content").split("|").at(1).trimmed(); str[E_DATA_PLATFORM_TITLE] = proTitle.attribute("content").split("|").at(1).trimmed();
if(str[E_DATA_PLATFORM_TITLE].length() > 0) if(str[E_DATA_PLATFORM_TITLE].length() > 0)
@@ -514,6 +548,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
str[E_DATA_PLATFORM_TITLE] = GetSafeUtf(proTitle.toPlainText().trimmed()); str[E_DATA_PLATFORM_TITLE] = GetSafeUtf(proTitle.toPlainText().trimmed());
} }
QWebElement image; QWebElement image;
QWebElement profile = Find(frame->documentElement(),"div","id","blog-profile"); QWebElement profile = Find(frame->documentElement(),"div","id","blog-profile");
{ {
@@ -566,7 +601,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
str[E_DATA_NICK] = str[E_DATA_ID]; str[E_DATA_NICK] = str[E_DATA_ID];
image = Find(profile,"img","alt","프로필 이미지"); image = Find(profile,"img","alt","프로필 이미지");
strProfile = Find(profile, "p", "class", "caption align").toPlainText().trimmed();
//strSympathy = FindLeft(Find(frame->documentElement(),"p","class","postre"),"a","class","pcol2 _symList").toPlainText().split(" ").at(1); //strSympathy = FindLeft(Find(frame->documentElement(),"p","class","postre"),"a","class","pcol2 _symList").toPlainText().split(" ").at(1);
/* /*
@@ -632,6 +667,14 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
strQuery += image.attribute("src").trimmed(); strQuery += image.attribute("src").trimmed();
strQuery += "'"; strQuery += "'";
} }
strProfile = GetSafeUtf(strProfile);
if(strProfile.length() > 0)
{
strQuery += ", ";
strQuery += "article_profile='";
strQuery += strProfile;
strQuery += "'";
}
strQuery += " where article_url='"; strQuery += " where article_url='";
strQuery += m_strUrl; strQuery += m_strUrl;
strQuery += "'"; strQuery += "'";
@@ -1014,9 +1057,8 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
{ {
bool b_last = false; bool b_last = false;
QWebElement paging_comm = Find(frame->documentElement(), "div", "class", "paging_comm");
QWebElement paging_inner = Find(paging_comm, "span", "class", "paging_inner"); b_last = Find(frame->documentElement(), "div", "class", "result_message mg_cont hide").isNull();
b_last = Find(paging_inner, "a", "class", "ico_comm1 btn_page btn_next").isNull();
QWebElement total = Find(eleMain,"span","class","f_nb f_l"); QWebElement total = Find(eleMain,"span","class","f_nb f_l");
if (total.toPlainText().isEmpty()) {m_bError = true; return;} if (total.toPlainText().isEmpty()) {m_bError = true; return;}
@@ -1026,6 +1068,7 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
int nNowFirst = GetNumber(strList.at(strList.size() - 2)); int nNowFirst = GetNumber(strList.at(strList.size() - 2));
if (nNow >= 1000 || (nNow - nNowFirst) < 9 || b_last) if (nNow >= 1000 || (nNow - nNowFirst) < 9 || b_last)
m_bLast = true; m_bLast = true;
//cout << "nNow : " << nNow << endl << "nNow - nNowFirst: " << (nNow - nNowFirst) << endl << "b_last : " << b_last << endl;
} }
} }
@@ -1314,3 +1357,57 @@ QList<QWebElement> SCrawler::FindAllMid(const QWebElement _FindElement,const QSt
} }
return returnElements; return returnElements;
} }
bool SCrawler::getProxyList(QString &_str)
{
QSqlQuery sqlquery;
QString strquery = "select proxy, port from Proxy";
QString queryutf = strquery.toUtf8();
if(sqlquery.exec(queryutf) == false)
{
return false;
}
while(sqlquery.next())
{
QString str = sqlquery.value(0).toString();
str += ",";
str += sqlquery.value(1).toString();
str += "\n";
_str += str;
}
return true;
}
void SCrawler::setProxy()
{
QString proxyList;
if (getProxyList(proxyList))
{
QVector <QStringList> vecProxy;
QStringList strListProxy = proxyList.split("\n");
foreach(QString str, strListProxy)
{
str = str.trimmed();
if (str.isEmpty()) continue;
vecProxy.push_back(str.split(","));
}
if (vecProxy.size() > 0)
{
QStringList strList = vecProxy.at(rand()%vecProxy.size());
switch(strList.size())
{
case 1:
cout << "p : " << strList.at(0).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
break;
case 2:
//cout << "p : " << strList.at(0).toStdString() << ", " << strList.at(1).toStdString() << endl;
cout << "p : " << strList.at(0).toStdString() << endl;
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
break;
}
}
}
}

View File

@@ -57,6 +57,8 @@ private:
void saveFrameDaumBlogComment(QWebFrame *frame); void saveFrameDaumBlogComment(QWebFrame *frame);
void saveFrameDaumCafeUrl(QWebFrame *frame); void saveFrameDaumCafeUrl(QWebFrame *frame);
int GetNumber(QString _str); int GetNumber(QString _str);
bool getProxyList(QString &_str);
void setProxy();
QWebElement Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind); QWebElement Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind);