diff --git a/CrawlerProcess/scrawler.cpp b/CrawlerProcess/scrawler.cpp index 2809a02..f104fb9 100644 --- a/CrawlerProcess/scrawler.cpp +++ b/CrawlerProcess/scrawler.cpp @@ -463,7 +463,7 @@ void SCrawler::saveFrameUrl(QWebFrame *frame) QString str = profile.toPlainText().split("\n").at(0); if (str.isEmpty() == false) { - bodydata.setData(str, bodydata.ARTICLE_NICKNAME); + //bodydata.setData(str, bodydata.ARTICLE_NICKNAME); bodydata.setData(m_strUrl, bodydata.ARTICLE_URL); /* QString strQuery = "update " + m_strTable + " set article_nickname = '"; @@ -546,18 +546,20 @@ void SCrawler::saveFrameUrl(QWebFrame *frame) } } str[E_DATA_NICK] = GetSafeUtf(str[E_DATA_NICK]); - if (m_strUrl.split("/").at(3) == str[E_DATA_NICK]) + if (m_strUrl.split("/").at(3).trimmed() == str[E_DATA_NICK].trimmed()) { str[E_DATA_ID] = str[E_DATA_NICK]; } else { + /* QWebElement id = Find(profile,"span","class","itemfont col"); if (id.toPlainText().isEmpty()==false) { str[E_DATA_ID] = id.toPlainText(); str[E_DATA_ID] = str[E_DATA_ID].replace("(","").replace(")",""); } + */ if (str[E_DATA_ID].isEmpty()) { @@ -570,6 +572,8 @@ void SCrawler::saveFrameUrl(QWebFrame *frame) if(str[E_DATA_NICK].length() == 0) str[E_DATA_NICK] = str[E_DATA_ID]; + //qDebug() << profile.toInnerXml(); + image = Find(profile,"img","alt","프로필 이미지"); strProfile = Find(profile, "p", "class", "caption align").toPlainText().trimmed(); //strSympathy = FindLeft(Find(frame->documentElement(),"p","class","postre"),"a","class","pcol2 _symList").toPlainText().split(" ").at(1); @@ -1163,7 +1167,11 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame) foreach(QString str, urlList) { strUrlList += "'"; - strUrlList += str; + QStringList strlist = str.split("?"); + if(strlist.size() > 1) + strUrlList += strlist.at(0).trimmed(); + else + strUrlList += str; strUrlList += "',"; } strUrlList = strUrlList.left(strUrlList.size() - 1); @@ -1183,8 +1191,6 @@ void SCrawler::saveFrameDaumCafeList(QWebFrame *frame) } } - - foreach(QWebElement eleSub,eleMain.findAll("div")) { if (eleSub.attribute("class") == "wrap_cont") @@ -1284,12 +1290,22 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame) QWebElement other = frame->documentElement().findFirst("title"); QString strTitle = other.toPlainText().trimmed().split("|").at(0).trimmed(); - + QString strUrl_; if (strTitle.isEmpty() == false) { bodydata.setTable(m_strTable); - bodydata.setData(m_strUrl, bodydata.ARTICLE_URL); + QStringList strlist = m_strUrl.split("?"); + if(strlist.size() > 1) + { + bodydata.setData(strlist.at(0).trimmed(), bodydata.ARTICLE_URL); + strUrl_ = strlist.at(0).trimmed(); + } + else + { + bodydata.setData(m_strUrl, bodydata.ARTICLE_URL); + strUrl_ = m_strUrl; + } bodydata.setData(SqlString(GetSafeUtf(strTitle)), bodydata.PLATFORM_TITLE); /* QString strQuery = "update "; @@ -1375,7 +1391,17 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame) if(!strID.isEmpty()) bodydata.setData(strID, bodydata.ARTICLE_ID); bodydata.setData(strHits, bodydata.ARTICLE_HIT); - bodydata.setData(m_strUrl, bodydata.ARTICLE_URL); + QStringList strlist = m_strUrl.split("?"); + if(strlist.size() > 1) + { + bodydata.setData(strlist.at(0).trimmed(), bodydata.ARTICLE_URL); + strUrl_ = strlist.at(0).trimmed(); + } + else + { + bodydata.setData(m_strUrl, bodydata.ARTICLE_URL); + strUrl_ = m_strUrl; + } bodydata.setData("daum", bodydata.PLATFORM_NAME); bodydata.setData("cafe", bodydata.PLATFORM_FORM); bodydata.setData("body", bodydata.ARTICLE_FORM); @@ -1439,7 +1465,14 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame) if(strDatetest.count(".") == 0) { strDate = QDateTime::currentDateTime().toString("yyyy-MM-dd"); - strDate += (" " + strDatetest); + strDate += (" " + strDatetest + ":00"); + QDateTime getTime = QDateTime::fromString(strDate, "yyyy-MM-dd hh:mm:ss"); + QDateTime nowTime = QDateTime::currentDateTime(); + if(getTime > nowTime) + { + getTime.addDays(-1); + strDate = getTime.toString("yyyy-MM-dd hh:mm:ss"); + } } else { @@ -1447,11 +1480,10 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame) } if (strDate.isEmpty()) continue; - else strDate += ":00"; QSqlQuery query; query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,article_order,platform_id,article_hit,platform_title) VALUES ('daum','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:ROWNUM,:PLATFORMID,:HITS,:TITLE)").toUtf8()); - query.bindValue(":URL",m_strUrl.toUtf8()); + query.bindValue(":URL",strUrl_.toUtf8()); query.bindValue(":ID",strID.toUtf8()); query.bindValue(":NICK",strNick.toUtf8()); query.bindValue(":DATA",strData.toUtf8()); @@ -1485,7 +1517,14 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame) if(strDatetest.count(".") == 0) { strDate = QDateTime::currentDateTime().toString("yyyy-MM-dd"); - strDate += (" " + strDatetest); + strDate += (" " + strDatetest + ":00"); + QDateTime getTime = QDateTime::fromString(strDate, "yyyy-MM-dd hh:mm:ss"); + QDateTime nowTime = QDateTime::currentDateTime(); + if(getTime > nowTime) + { + getTime.addDays(-1); + strDate = getTime.toString("yyyy-MM-dd hh:mm:ss"); + } } else { @@ -1493,11 +1532,10 @@ void SCrawler::saveFrameDaumCafeUrl(QWebFrame *frame) } if (strDate.isEmpty()) continue; - else strDate += ":00"; QSqlQuery query; query.prepare(QString("insert into " + m_strTable + " (platform_name,platform_form,article_form,article_url,article_id,article_nickname,article_data,article_date,article_parent,article_order,platform_id,article_hit,platform_title) VALUES ('daum','cafe','reply',:URL,:ID,:NICK,:DATA,:DATE,:PARENT,:ROWNUM,:PLATFORMID,:HITS,:TITLE)").toUtf8()); - query.bindValue(":URL",m_strUrl.toUtf8()); + query.bindValue(":URL",strUrl_.toUtf8()); query.bindValue(":ID",strID.toUtf8()); query.bindValue(":NICK",strNick.toUtf8()); query.bindValue(":DATA",strData.toUtf8());