proxy db연동
네이버 블로그 리스트가 모드 v,x시 무한 reloadlist되는 버그 수정 다음 카페 크롤러 last조건 수정 네이버 article_profile 작성 git-svn-id: svn://192.168.0.12/source@57 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -35,6 +35,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
|||||||
m_nSelect = E_NAVER_CAFE_LIST;
|
m_nSelect = E_NAVER_CAFE_LIST;
|
||||||
m_strKeywordID = _strlistArgv[4];
|
m_strKeywordID = _strlistArgv[4];
|
||||||
|
|
||||||
|
/*
|
||||||
QFile file("proxy.txt");
|
QFile file("proxy.txt");
|
||||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
||||||
{
|
{
|
||||||
@@ -61,6 +62,36 @@ void SCrawler::load(QStringList _strlistArgv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
QString proxyList;
|
||||||
|
if (getProxyList(proxyList))
|
||||||
|
{
|
||||||
|
QVector <QStringList> vecProxy;
|
||||||
|
QStringList strListProxy = proxyList.split("\n");
|
||||||
|
foreach(QString str, strListProxy)
|
||||||
|
{
|
||||||
|
str = str.trimmed();
|
||||||
|
if (str.isEmpty()) continue;
|
||||||
|
vecProxy.push_back(str.split(","));
|
||||||
|
}
|
||||||
|
if (vecProxy.size() > 0)
|
||||||
|
{
|
||||||
|
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
||||||
|
switch(strList.size())
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||||
|
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
cout << "p : " << strList.at(0).toStdString() << ", " << strList.at(1).toStdString() << endl;
|
||||||
|
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
setProxy();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_strlistArgv[1] == "cafe_data")
|
if (_strlistArgv[1] == "cafe_data")
|
||||||
@@ -76,7 +107,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
|||||||
m_nSelect = E_NAVER_BLOG_LIST;
|
m_nSelect = E_NAVER_BLOG_LIST;
|
||||||
m_strKeywordID = _strlistArgv[4];
|
m_strKeywordID = _strlistArgv[4];
|
||||||
//cout << "ok";
|
//cout << "ok";
|
||||||
|
/*
|
||||||
QFile file("proxy.txt");
|
QFile file("proxy.txt");
|
||||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
||||||
{
|
{
|
||||||
@@ -102,7 +133,8 @@ void SCrawler::load(QStringList _strlistArgv)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}*/
|
||||||
|
setProxy();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_strlistArgv[1] == "blog_url")
|
if (_strlistArgv[1] == "blog_url")
|
||||||
@@ -129,7 +161,8 @@ void SCrawler::load(QStringList _strlistArgv)
|
|||||||
m_strUrl = _strlistArgv[2];
|
m_strUrl = _strlistArgv[2];
|
||||||
m_nSelect = E_DAUM_CAFE_LIST;
|
m_nSelect = E_DAUM_CAFE_LIST;
|
||||||
m_strKeywordID = _strlistArgv[4];
|
m_strKeywordID = _strlistArgv[4];
|
||||||
|
setProxy();
|
||||||
|
/*
|
||||||
QFile file("proxy.txt");
|
QFile file("proxy.txt");
|
||||||
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
if (file.open(QIODevice::ReadOnly | QIODevice::Text))
|
||||||
{
|
{
|
||||||
@@ -155,7 +188,7 @@ void SCrawler::load(QStringList _strlistArgv)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}*/
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_strlistArgv[1] == "cafe_data")
|
if (_strlistArgv[1] == "cafe_data")
|
||||||
@@ -504,6 +537,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
|||||||
QString str[E_DATA_MAX];
|
QString str[E_DATA_MAX];
|
||||||
QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data","platform_title"};
|
QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data","platform_title"};
|
||||||
QString strSympathy;
|
QString strSympathy;
|
||||||
|
QString strProfile;
|
||||||
QWebElement proTitle = Find(frame->documentElement(),"meta","property","og:article:author");
|
QWebElement proTitle = Find(frame->documentElement(),"meta","property","og:article:author");
|
||||||
str[E_DATA_PLATFORM_TITLE] = proTitle.attribute("content").split("|").at(1).trimmed();
|
str[E_DATA_PLATFORM_TITLE] = proTitle.attribute("content").split("|").at(1).trimmed();
|
||||||
if(str[E_DATA_PLATFORM_TITLE].length() > 0)
|
if(str[E_DATA_PLATFORM_TITLE].length() > 0)
|
||||||
@@ -514,6 +548,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
|||||||
str[E_DATA_PLATFORM_TITLE] = GetSafeUtf(proTitle.toPlainText().trimmed());
|
str[E_DATA_PLATFORM_TITLE] = GetSafeUtf(proTitle.toPlainText().trimmed());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
QWebElement image;
|
QWebElement image;
|
||||||
QWebElement profile = Find(frame->documentElement(),"div","id","blog-profile");
|
QWebElement profile = Find(frame->documentElement(),"div","id","blog-profile");
|
||||||
{
|
{
|
||||||
@@ -566,7 +601,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
|||||||
str[E_DATA_NICK] = str[E_DATA_ID];
|
str[E_DATA_NICK] = str[E_DATA_ID];
|
||||||
|
|
||||||
image = Find(profile,"img","alt","프로필 이미지");
|
image = Find(profile,"img","alt","프로필 이미지");
|
||||||
|
strProfile = Find(profile, "p", "class", "caption align").toPlainText().trimmed();
|
||||||
//strSympathy = FindLeft(Find(frame->documentElement(),"p","class","postre"),"a","class","pcol2 _symList").toPlainText().split(" ").at(1);
|
//strSympathy = FindLeft(Find(frame->documentElement(),"p","class","postre"),"a","class","pcol2 _symList").toPlainText().split(" ").at(1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -632,6 +667,14 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
|||||||
strQuery += image.attribute("src").trimmed();
|
strQuery += image.attribute("src").trimmed();
|
||||||
strQuery += "'";
|
strQuery += "'";
|
||||||
}
|
}
|
||||||
|
strProfile = GetSafeUtf(strProfile);
|
||||||
|
if(strProfile.length() > 0)
|
||||||
|
{
|
||||||
|
strQuery += ", ";
|
||||||
|
strQuery += "article_profile='";
|
||||||
|
strQuery += strProfile;
|
||||||
|
strQuery += "'";
|
||||||
|
}
|
||||||
strQuery += " where article_url='";
|
strQuery += " where article_url='";
|
||||||
strQuery += m_strUrl;
|
strQuery += m_strUrl;
|
||||||
strQuery += "'";
|
strQuery += "'";
|
||||||
@@ -1014,9 +1057,8 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
|
|||||||
|
|
||||||
{
|
{
|
||||||
bool b_last = false;
|
bool b_last = false;
|
||||||
QWebElement paging_comm = Find(frame->documentElement(), "div", "class", "paging_comm");
|
|
||||||
QWebElement paging_inner = Find(paging_comm, "span", "class", "paging_inner");
|
b_last = Find(frame->documentElement(), "div", "class", "result_message mg_cont hide").isNull();
|
||||||
b_last = Find(paging_inner, "a", "class", "ico_comm1 btn_page btn_next").isNull();
|
|
||||||
|
|
||||||
QWebElement total = Find(eleMain,"span","class","f_nb f_l");
|
QWebElement total = Find(eleMain,"span","class","f_nb f_l");
|
||||||
if (total.toPlainText().isEmpty()) {m_bError = true; return;}
|
if (total.toPlainText().isEmpty()) {m_bError = true; return;}
|
||||||
@@ -1026,6 +1068,7 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame)
|
|||||||
int nNowFirst = GetNumber(strList.at(strList.size() - 2));
|
int nNowFirst = GetNumber(strList.at(strList.size() - 2));
|
||||||
if (nNow >= 1000 || (nNow - nNowFirst) < 9 || b_last)
|
if (nNow >= 1000 || (nNow - nNowFirst) < 9 || b_last)
|
||||||
m_bLast = true;
|
m_bLast = true;
|
||||||
|
//cout << "nNow : " << nNow << endl << "nNow - nNowFirst: " << (nNow - nNowFirst) << endl << "b_last : " << b_last << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1314,3 +1357,57 @@ QList<QWebElement> SCrawler::FindAllMid(const QWebElement _FindElement,const QSt
|
|||||||
}
|
}
|
||||||
return returnElements;
|
return returnElements;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SCrawler::getProxyList(QString &_str)
|
||||||
|
{
|
||||||
|
QSqlQuery sqlquery;
|
||||||
|
QString strquery = "select proxy, port from Proxy";
|
||||||
|
QString queryutf = strquery.toUtf8();
|
||||||
|
|
||||||
|
if(sqlquery.exec(queryutf) == false)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(sqlquery.next())
|
||||||
|
{
|
||||||
|
QString str = sqlquery.value(0).toString();
|
||||||
|
str += ",";
|
||||||
|
str += sqlquery.value(1).toString();
|
||||||
|
str += "\n";
|
||||||
|
_str += str;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SCrawler::setProxy()
|
||||||
|
{
|
||||||
|
QString proxyList;
|
||||||
|
if (getProxyList(proxyList))
|
||||||
|
{
|
||||||
|
QVector <QStringList> vecProxy;
|
||||||
|
QStringList strListProxy = proxyList.split("\n");
|
||||||
|
foreach(QString str, strListProxy)
|
||||||
|
{
|
||||||
|
str = str.trimmed();
|
||||||
|
if (str.isEmpty()) continue;
|
||||||
|
vecProxy.push_back(str.split(","));
|
||||||
|
}
|
||||||
|
if (vecProxy.size() > 0)
|
||||||
|
{
|
||||||
|
QStringList strList = vecProxy.at(rand()%vecProxy.size());
|
||||||
|
switch(strList.size())
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||||
|
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0))));
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
//cout << "p : " << strList.at(0).toStdString() << ", " << strList.at(1).toStdString() << endl;
|
||||||
|
cout << "p : " << strList.at(0).toStdString() << endl;
|
||||||
|
QNetworkProxy::setApplicationProxy(*(new QNetworkProxy(QNetworkProxy::HttpProxy,strList.at(0),strList.at(1).toInt())));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -57,6 +57,8 @@ private:
|
|||||||
void saveFrameDaumBlogComment(QWebFrame *frame);
|
void saveFrameDaumBlogComment(QWebFrame *frame);
|
||||||
void saveFrameDaumCafeUrl(QWebFrame *frame);
|
void saveFrameDaumCafeUrl(QWebFrame *frame);
|
||||||
int GetNumber(QString _str);
|
int GetNumber(QString _str);
|
||||||
|
bool getProxyList(QString &_str);
|
||||||
|
void setProxy();
|
||||||
|
|
||||||
|
|
||||||
QWebElement Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind);
|
QWebElement Find(const QWebElement _FindElement,const QString _strElement,const QString _strAttrib,const QString _strFind);
|
||||||
|
|||||||
Reference in New Issue
Block a user