네이버 블로그 크롤링 오류시 timer를 통해 webpage 읽어오도록 수정
git-svn-id: svn://192.168.0.12/source@217 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -4,7 +4,10 @@
|
||||
#include <QSqlError>
|
||||
#include <QByteArray>
|
||||
#include <QDebug>
|
||||
#include <QTimer>
|
||||
|
||||
#define BLOG_RETRY_MAX 4
|
||||
#define BLOG_RETRY_INTERVAL 3000
|
||||
using namespace std;
|
||||
|
||||
struct SProxyList
|
||||
@@ -16,6 +19,7 @@ struct SProxyList
|
||||
SCrawler::SCrawler():QObject()
|
||||
{
|
||||
m_page = new QWebPage;
|
||||
m_nBlogRetryCount = 0;
|
||||
connect(m_page, SIGNAL(loadFinished(bool)), this, SLOT(saveResult(bool)));
|
||||
}
|
||||
|
||||
@@ -182,7 +186,8 @@ void SCrawler::saveResult(bool ok)
|
||||
case E_NAVER_BLOG_LIST:saveFrameList(m_page->mainFrame());break;
|
||||
case E_NAVER_BLOG_BODY:
|
||||
{
|
||||
saveFrameUrl(m_page->mainFrame());
|
||||
if(!saveFrameUrl(m_page->mainFrame()))
|
||||
return;
|
||||
bodydata.sendDB();
|
||||
break;
|
||||
}
|
||||
@@ -477,12 +482,12 @@ enum E_DATA
|
||||
E_DATA_MAX,
|
||||
};
|
||||
|
||||
void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
bool SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
{
|
||||
static int cz = 0;
|
||||
//static int cz = 0;
|
||||
// Debug(frame->frameName() + QString::number(cz++) + ".html",frame->toHtml());
|
||||
|
||||
QSqlQuery sql;
|
||||
//QSqlQuery sql;
|
||||
if (frame->frameName().compare(QString("BuddyConnectIframe")) == 0)
|
||||
{
|
||||
QWebElement profile = Find(frame->documentElement(),"div","class","profile_name");
|
||||
@@ -493,6 +498,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
if (frame->frameName().compare(QString("mainFrame")) == 0)
|
||||
{
|
||||
QString str[E_DATA_MAX];
|
||||
QString sympathy;
|
||||
QString strProfile;
|
||||
QWebElement proTitle = Find(frame->documentElement(),"meta","property","og:article:author");
|
||||
str[E_DATA_PLATFORM_TITLE] = proTitle.attribute("content").split("|").at(1).trimmed();
|
||||
@@ -585,9 +591,37 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
str[E_DATA_DATA] = GetSafeUtf(str[E_DATA_DATA]);
|
||||
}
|
||||
}
|
||||
{
|
||||
|
||||
QWebElement WEsympathy = Find(frame->documentElement(),"div","class","btn_like pcol2");
|
||||
|
||||
if(WEsympathy.isNull())
|
||||
{
|
||||
sympathy = "0";
|
||||
}
|
||||
else
|
||||
{
|
||||
sympathy = WEsympathy.toPlainText().trimmed();
|
||||
}
|
||||
//qDebug() << "Sympathy: " << sympathy;
|
||||
//qDebug() << strProfile;
|
||||
|
||||
}
|
||||
{
|
||||
|
||||
|
||||
//retry if profile is empty and sympathy is empty
|
||||
if(strProfile.isEmpty() || sympathy.isEmpty() && (m_nBlogRetryCount < BLOG_RETRY_MAX))
|
||||
{
|
||||
m_nBlogRetryCount++;
|
||||
QTimer::singleShot(BLOG_RETRY_INTERVAL, this, SLOT(crawlBlog()));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//QString strHead[E_DATA_MAX] = {"article_nickname","article_id","article_title","article_date","article_data","platform_title"};
|
||||
bodydata.setData(str[0].trimmed(), bodydata.ARTICLE_NICKNAME);
|
||||
bodydata.setData(str[1].trimmed(), bodydata.ARTICLE_ID);
|
||||
@@ -608,6 +642,8 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
bodydata.setData(m_strUrl, bodydata.ARTICLE_URL);
|
||||
bodydata.setTable(m_strTable);
|
||||
|
||||
bodydata.setData(sympathy, bodydata.ARTICLE_HIT);
|
||||
|
||||
bodydata.setData("naver", bodydata.PLATFORM_NAME);
|
||||
bodydata.setData("blog", bodydata.PLATFORM_FORM);
|
||||
bodydata.setData("body", bodydata.ARTICLE_FORM);
|
||||
@@ -616,8 +652,16 @@ void SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
|
||||
}
|
||||
|
||||
bool b_ok = true;
|
||||
foreach(QWebFrame *childFrame, frame->childFrames())
|
||||
saveFrameUrl(childFrame);
|
||||
b_ok = (b_ok && saveFrameUrl(childFrame));
|
||||
|
||||
return b_ok;
|
||||
}
|
||||
|
||||
void SCrawler::crawlBlog()
|
||||
{
|
||||
saveResult(true);
|
||||
}
|
||||
|
||||
void SCrawler::saveFrameComment(QWebFrame *frame)
|
||||
|
||||
Reference in New Issue
Block a user