proxy db연동
네이버 블로그 리스트가 모드 v,x시 무한 reloadlist되는 버그 수정 다음 카페 크롤러 last조건 수정 네이버 article_profile 작성 git-svn-id: svn://192.168.0.12/source@57 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -35,6 +35,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
||||
m_nSelect = E_NAVER_CAFE_LIST;
|
||||
m_strKeywordID = _strlistArgv[4];
|
||||
|
||||
/*
|
||||
QFile file("proxy.txt");
|
||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
||||
{
|
||||
@@ -61,6 +62,36 @@ void SCrawler::load(QStringList _strlistArgv)
|
||||
}
|
||||
}
|
||||
}
|
||||
QString proxyList;
|
||||
if (getProxyList(proxyList))
|
||||
{
|
||||
QVector <QStringList> vecProxy;
|
||||
QStringList strListProxy = proxyList.split("\n");
|
||||
foreach(QString str, strListProxy)
|
||||
{
|
||||
str = str.trimmed();
|
||||
if (str.isEmpty()) continue;
|
||||
vecProxy.push_back(str.split(","));
|
||||
}
|
||||
if (vecProxy.size() > 0)
|
||||
{
|
||||
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
||||
switch(strList.size())
|
||||
{
|
||||
case 1:
|
||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
||||
break;
|
||||
case 2:
|
||||
cout << "p : " << strList.at(0).toStdString() << ", " << strList.at(1).toStdString() << endl;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
setProxy();
|
||||
|
||||
}
|
||||
|
||||
if (_strlistArgv[1] == "cafe_data")
|
||||
@@ -76,7 +107,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
||||
m_nSelect = E_NAVER_BLOG_LIST;
|
||||
m_strKeywordID = _strlistArgv[4];
|
||||
//cout << "ok";
|
||||
|
||||
/*
|
||||
QFile file("proxy.txt");
|
||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
||||
{
|
||||
@@ -102,7 +133,8 @@ void SCrawler::load(QStringList _strlistArgv)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}*/
|
||||
setProxy();
|
||||
}
|
||||
|
||||
if (_strlistArgv[1] == "blog_url")
|
||||
@@ -129,7 +161,8 @@ void SCrawler::load(QStringList _strlistArgv)
|
||||
m_strUrl = _strlistArgv[2];
|
||||
m_nSelect = E_DAUM_CAFE_LIST;
|
||||
m_strKeywordID = _strlistArgv[4];
|
||||
|
||||
setProxy();
|
||||
/*
|
||||
QFile file("proxy.txt");
|
||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
||||
{
|
||||
@@ -155,7 +188,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
if (_strlistArgv[1] == "cafe_data")
|
||||
@@ -504,6 +537,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
QString str[E_DATA_MAX];
|
||||
QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data","platform_title"};
|
||||
QString strSympathy;
|
||||
QString strProfile;
|
||||
QWebElement proTitle = Find(frame->documentElement(),"meta","property","og:article:author");
|
||||
str[E_DATA_PLATFORM_TITLE] = proTitle.attribute("content").split("|").at(1).trimmed();
|
||||
if(str[E_DATA_PLATFORM_TITLE].length() > 0)
|
||||
@@ -514,6 +548,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
str[E_DATA_PLATFORM_TITLE] = GetSafeUtf(proTitle.toPlainText().trimmed());
|
||||
}
|
||||
|
||||
|
||||
QWebElement image;
|
||||
QWebElement profile = Find(frame->documentElement(),"div","id","blog-profile");
|
||||
{
|
||||
@@ -566,7 +601,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
str[E_DATA_NICK] = str[E_DATA_ID];
|
||||
|
||||
image = Find(profile,"img","alt","프로필 이미지");
|
||||
|
||||
strProfile = Find(profile, "p", "class", "caption align").toPlainText().trimmed();
|
||||
//strSympathy = FindLeft(Find(frame->documentElement(),"p","class","postre"),"a","class","pcol2 _symList").toPlainText().split(" ").at(1);
|
||||
|
||||
/*
|
||||
@@ -632,6 +667,14 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
strQuery += image.attribute("src").trimmed();
|
||||
strQuery += "'";
|
||||
}
|
||||
strProfile = GetSafeUtf(strProfile);
|
||||
if(strProfile.length() > 0)
|
||||
{
|
||||
strQuery += ", ";
|
||||
strQuery += "article_profile='";
|
||||
strQuery += strProfile;
|
||||
strQuery += "'";
|
||||
}
|
||||
strQuery += " where article_url='";
|
||||
strQuery += m_strUrl;
|
||||
strQuery += "'";
|
||||
@@ -1014,9 +1057,8 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
|
||||
|
||||
{
|
||||
bool b_last = false;
|
||||
QWebElement paging_comm = Find(frame->documentElement(), "div", "class", "paging_comm");
|
||||
QWebElement paging_inner = Find(paging_comm, "span", "class", "paging_inner");
|
||||
b_last = Find(paging_inner, "a", "class", "ico_comm1 btn_page btn_next").isNull();
|
||||
|
||||
b_last = Find(frame->documentElement(), "div", "class", "result_message mg_cont hide").isNull();
|
||||
|
||||
QWebElement total = Find(eleMain,"span","class","f_nb f_l");
|
||||
if (total.toPlainText().isEmpty()) {m_bError = true; return;}
|
||||
@@ -1026,6 +1068,7 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
|
||||
int nNowFirst = GetNumber(strList.at(strList.size() - 2));
|
||||
if (nNow >= 1000 || (nNow - nNowFirst) < 9 || b_last)
|
||||
m_bLast = true;
|
||||
//cout << "nNow : " << nNow << endl << "nNow - nNowFirst: " << (nNow - nNowFirst) << endl << "b_last : " << b_last << endl;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1314,3 +1357,57 @@ QList<QWebElement> SCrawler::FindAllMid(const QWebElement _FindElement,const QSt
|
||||
}
|
||||
return returnElements;
|
||||
}
|
||||
|
||||
bool SCrawler::getProxyList(QString &_str)
|
||||
{
|
||||
QSqlQuery sqlquery;
|
||||
QString strquery = "select proxy, port from Proxy";
|
||||
QString queryutf = strquery.toUtf8();
|
||||
|
||||
if(sqlquery.exec(queryutf) == false)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
while(sqlquery.next())
|
||||
{
|
||||
QString str = sqlquery.value(0).toString();
|
||||
str += ",";
|
||||
str += sqlquery.value(1).toString();
|
||||
str += "\n";
|
||||
_str += str;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SCrawler::setProxy()
|
||||
{
|
||||
QString proxyList;
|
||||
if (getProxyList(proxyList))
|
||||
{
|
||||
QVector <QStringList> vecProxy;
|
||||
QStringList strListProxy = proxyList.split("\n");
|
||||
foreach(QString str, strListProxy)
|
||||
{
|
||||
str = str.trimmed();
|
||||
if (str.isEmpty()) continue;
|
||||
vecProxy.push_back(str.split(","));
|
||||
}
|
||||
if (vecProxy.size() > 0)
|
||||
{
|
||||
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
||||
switch(strList.size())
|
||||
{
|
||||
case 1:
|
||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
||||
break;
|
||||
case 2:
|
||||
//cout << "p : " << strList.at(0).toStdString() << ", " << strList.at(1).toStdString() << endl;
|
||||
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,6 +57,8 @@ private:
|
||||
void saveFrameDaumBlogComment(QWebFrame *frame);
|
||||
void saveFrameDaumCafeUrl(QWebFrame *frame);
|
||||
int GetNumber(QString _str);
|
||||
bool getProxyList(QString &_str);
|
||||
void setProxy();
|
||||
|
||||
|
||||
QWebElement Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind);
|
||||
|
||||
Reference in New Issue
Block a user