Merge branch 'working/twitter'

2017-07-27 11:33:10 +09:00
parent 64fc36da50 5e8ba15bbd
commit 922aaabe74
29 changed files with 1221 additions and 15 deletions
--- a/CrawlerList/CrawlerList.pro
+++ b/CrawlerList/CrawlerList.pro
@@ -28,7 +28,11 @@ SOURCES += main.cpp\
    skakaousermanage.cpp \
    sfacebooktagmanage.cpp \
    sfacebookusermanage.cpp \
-    snaverblogaccuracymanager.cpp
+    snaverblogaccuracymanager.cpp \
    stwittertagmanage.cpp \
    stwitterusermanage.cpp \
    syoutubetagmanage.cpp \
    syoutubeusermanage.cpp
 HEADERS  += widget.h \
    smanage.h \
@@ -45,5 +49,9 @@ HEADERS  += widget.h \
    skakaousermanage.h \
    sfacebooktagmanage.h \
    sfacebookusermanage.h \
-    snaverblogaccuracymanage.h
+    snaverblogaccuracymanage.h \
    stwittertagmanage.h \
    stwitterusermanage.h \
    syoutubetagmanage.h \
    syoutubeusermanage.h
--- a/CrawlerList/stwittertagmanage.cpp
+++ b/CrawlerList/stwittertagmanage.cpp
@@ -0,0 +1,89 @@
 #include "stwittertagmanage.h"
 #include <QThread>
 #include "widget.h"
 STwitterTagManage::STwitterTagManage(QObject *pObject) : SManage(pObject)
 {
    m_nID = 0;
    connect(&m_pro[0], SIGNAL(readyReadStandardOutput()), this, SLOT(readStandardOutput()));
    connect(&m_pro[0], SIGNAL(readyReadStandardError()), this, SLOT(readStandardError()));
 }
 void STwitterTagManage::Start()
 {
    m_nMode = E_PROCESS_RUN;
    m_bFinalLast = false;
 }
 bool STwitterTagManage::Update()
 {
    if(m_bFinalLast) return m_bFinalLast;
    switch(m_nMode)
    {
    case E_PROCESS_RUN:
        if(UseProcess() == false)
        {
 #if defined(Q_OS_WIN32)
            m_pro[0].start("python", QStringList() << "webbasedcrawler.py" << "twitter"  << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
 #else
            m_pro[0].start("/usr/bin/python3", QStringList() << "webbasedcrawler.py" << "twitter" << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
 #endif
            m_nMode = E_PROCESS_FINISH_WAIT;
        }
        break;
    case E_PROCESS_FINISH_WAIT:
        break;
    }
    return m_bFinalLast;
 }
 void STwitterTagManage::processFinished(QProcess *pPro, QString _strOut)
 {
    switch(m_nMode)
    {
    case E_PROCESS_FINISH_WAIT:
        m_nMode = E_PROCESS_RUN;
        m_bFinalLast = true;
        m_pMain->InsertLog("Finish Crawling :)");
        m_pMain->SetCrawlingState("Finish");
        m_ncList=1;
        m_bLast = false;
        break;
    }
 }
 void STwitterTagManage::readStandardOutput()
 {
    QProcess *pPro = (QProcess*)sender();
    QThread::msleep(100);
    QString str = pPro->readAllStandardOutput();
    QStringList list = str.split("\n", QString::SkipEmptyParts);
    foreach(QString log,list)
    {
        if (m_pMain)
        {
            m_pMain->InsertLog(log);
        }
        else
            exit(0);
    }
 }
 void STwitterTagManage::readStandardError()
 {
    QProcess *pPro = (QProcess*)sender();
    QThread::msleep(100);
    QString str = pPro->readAllStandardError();
    QStringList list = str.split("\n", QString::SkipEmptyParts);
    foreach(QString log,list)
    {
        if (m_pMain)
        {
            m_pMain->InsertLog(log);
        }
        else
            exit(0);
    }
 }
--- a/CrawlerList/stwittertagmanage.h
+++ b/CrawlerList/stwittertagmanage.h
@@ -0,0 +1,30 @@
 #ifndef STWITTERTAGMANAGE_H
 #define STWITTERTAGMANAGE_H
 #include "smanage.h"
 class STwitterTagManage : public SManage
 {
    Q_OBJECT
 public:
    enum E_PROCESS_STATE
    {
        E_PROCESS_RUN = 0,
        E_PROCESS_FINISH_WAIT,
    };
    STwitterTagManage(QObject *pObject);
 private:
    QString makeGetListQuery(QString _str,QDate _date,int _nPage);
 private:
    QString m_strListQuery;
    QVector <QString> m_strListURL;
 protected:
    bool Update();
    void Start();
    void processFinished(QProcess *pPro,QString _strOut);
    void ReLoadList();
 private slots:
    void readStandardOutput();
    void readStandardError();
 };
 #endif // STWITTERTAGMANAGE_H
--- a/CrawlerList/stwitterusermanage.cpp
+++ b/CrawlerList/stwitterusermanage.cpp
@@ -0,0 +1,89 @@
 #include "stwitterusermanage.h"
 #include <QThread>
 #include "widget.h"
 STwitterUserManage::STwitterUserManage(QObject *pObject) : SManage(pObject)
 {
    m_nID = 0;
    connect(&m_pro[0], SIGNAL(readyReadStandardOutput()), this, SLOT(readStandardOutput()));
    connect(&m_pro[0], SIGNAL(readyReadStandardError()), this, SLOT(readStandardError()));
 }
 void STwitterUserManage::Start()
 {
    m_nMode = E_PROCESS_RUN;
    m_bFinalLast = false;
 }
 bool STwitterUserManage::Update()
 {
    if(m_bFinalLast) return m_bFinalLast;
    switch(m_nMode)
    {
    case E_PROCESS_RUN:
        if(UseProcess() == false)
        {
 #if defined(Q_OS_WIN32)
            m_pro[0].start("python", QStringList() << "webbasedcrawler.py" << "twitter"  << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
 #else
            m_pro[0].start("/usr/bin/python3", QStringList() << "webbasedcrawler.py" << "twitter" << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
 #endif
            m_nMode = E_PROCESS_FINISH_WAIT;
        }
        break;
    case E_PROCESS_FINISH_WAIT:
        break;
    }
    return m_bFinalLast;
 }
 void STwitterUserManage::processFinished(QProcess *pPro, QString _strOut)
 {
    switch(m_nMode)
    {
    case E_PROCESS_FINISH_WAIT:
        m_nMode = E_PROCESS_RUN;
        m_bFinalLast = true;
        m_pMain->InsertLog("Finish Crawling :)");
        m_pMain->SetCrawlingState("Finish");
        m_ncList=1;
        m_bLast = false;
        break;
    }
 }
 void STwitterUserManage::readStandardOutput()
 {
    QProcess *pPro = (QProcess*)sender();
    QThread::msleep(100);
    QString str = pPro->readAllStandardOutput();
    QStringList list = str.split("\n", QString::SkipEmptyParts);
    foreach(QString log,list)
    {
        if (m_pMain)
        {
            m_pMain->InsertLog(log);
        }
        else
            exit(0);
    }
 }
 void STwitterUserManage::readStandardError()
 {
    QProcess *pPro = (QProcess*)sender();
    QThread::msleep(100);
    QString str = pPro->readAllStandardError();
    QStringList list = str.split("\n", QString::SkipEmptyParts);
    foreach(QString log,list)
    {
        if (m_pMain)
        {
            m_pMain->InsertLog(log);
        }
        else
            exit(0);
    }
 }
--- a/CrawlerList/stwitterusermanage.h
+++ b/CrawlerList/stwitterusermanage.h
@@ -0,0 +1,30 @@
 #ifndef STWITTERUSERMANAGE_H
 #define STWITTERUSERMANAGE_H
 #include "smanage.h"
 class STwitterUserManage : public SManage
 {
    Q_OBJECT
 public:
    enum E_PROCESS_STATE
    {
        E_PROCESS_RUN = 0,
        E_PROCESS_FINISH_WAIT,
    };
    STwitterUserManage(QObject *pObject);
 private:
    QString makeGetListQuery(QString _str,QDate _date,int _nPage);
 private:
    QString m_strListQuery;
    QVector <QString> m_strListURL;
 protected:
    bool Update();
    void Start();
    void processFinished(QProcess *pPro,QString _strOut);
    void ReLoadList();
 private slots:
    void readStandardOutput();
    void readStandardError();
 };
 #endif // STWITTERUSERMANAGE_H
--- a/CrawlerList/syoutubetagmanage.cpp
+++ b/CrawlerList/syoutubetagmanage.cpp
@@ -0,0 +1,89 @@
 #include "syoutubetagmanage.h"
 #include <QThread>
 #include "widget.h"
 SYoutubeTagManage::SYoutubeTagManage(QObject *pObject) : SManage(pObject)
 {
    m_nID = 0;
    connect(&m_pro[0], SIGNAL(readyReadStandardOutput()), this, SLOT(readStandardOutput()));
    connect(&m_pro[0], SIGNAL(readyReadStandardError()), this, SLOT(readStandardError()));
 }
 void SYoutubeTagManage::Start()
 {
    m_nMode = E_PROCESS_RUN;
    m_bFinalLast = false;
 }
 bool SYoutubeTagManage::Update()
 {
    if(m_bFinalLast) return m_bFinalLast;
    switch(m_nMode)
    {
    case E_PROCESS_RUN:
        if(UseProcess() == false)
        {
 #if defined(Q_OS_WIN32)
            m_pro[0].start("python", QStringList() << "webbasedcrawler.py" << "youtube"  << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
 #else
            m_pro[0].start("/usr/bin/python3", QStringList() << "webbasedcrawler.py" << "youtube" << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
 #endif
            m_nMode = E_PROCESS_FINISH_WAIT;
        }
        break;
    case E_PROCESS_FINISH_WAIT:
        break;
    }
    return m_bFinalLast;
 }
 void SYoutubeTagManage::processFinished(QProcess *pPro, QString _strOut)
 {
    switch(m_nMode)
    {
    case E_PROCESS_FINISH_WAIT:
        m_nMode = E_PROCESS_RUN;
        m_bFinalLast = true;
        m_pMain->InsertLog("Finish Crawling :)");
        m_pMain->SetCrawlingState("Finish");
        m_ncList=1;
        m_bLast = false;
        break;
    }
 }
 void SYoutubeTagManage::readStandardOutput()
 {
    QProcess *pPro = (QProcess*)sender();
    QThread::msleep(100);
    QString str = pPro->readAllStandardOutput();
    QStringList list = str.split("\n", QString::SkipEmptyParts);
    foreach(QString log,list)
    {
        if (m_pMain)
        {
            m_pMain->InsertLog(log);
        }
        else
            exit(0);
    }
 }
 void SYoutubeTagManage::readStandardError()
 {
    QProcess *pPro = (QProcess*)sender();
    QThread::msleep(100);
    QString str = pPro->readAllStandardError();
    QStringList list = str.split("\n", QString::SkipEmptyParts);
    foreach(QString log,list)
    {
        if (m_pMain)
        {
            m_pMain->InsertLog(log);
        }
        else
            exit(0);
    }
 }
--- a/CrawlerList/syoutubetagmanage.h
+++ b/CrawlerList/syoutubetagmanage.h
@@ -0,0 +1,30 @@
 #ifndef STYOUTUBETAGMANAGE_H
 #define STYOUTUBETAGMANAGE_H
 #include "smanage.h"
 class SYoutubeTagManage : public SManage
 {
    Q_OBJECT
 public:
    enum E_PROCESS_STATE
    {
        E_PROCESS_RUN = 0,
        E_PROCESS_FINISH_WAIT,
    };
    SYoutubeTagManage(QObject *pObject);
 private:
    QString makeGetListQuery(QString _str,QDate _date,int _nPage);
 private:
    QString m_strListQuery;
    QVector <QString> m_strListURL;
 protected:
    bool Update();
    void Start();
    void processFinished(QProcess *pPro,QString _strOut);
    void ReLoadList();
 private slots:
    void readStandardOutput();
    void readStandardError();
 };
 #endif // STYOUTUBETAGMANAGE_H
--- a/CrawlerList/syoutubeusermanage.cpp
+++ b/CrawlerList/syoutubeusermanage.cpp
@@ -0,0 +1,89 @@
 #include "syoutubeusermanage.h"
 #include <QThread>
 #include "widget.h"
 SYoutubeUserManage::SYoutubeUserManage(QObject *pObject) : SManage(pObject)
 {
    m_nID = 0;
    connect(&m_pro[0], SIGNAL(readyReadStandardOutput()), this, SLOT(readStandardOutput()));
    connect(&m_pro[0], SIGNAL(readyReadStandardError()), this, SLOT(readStandardError()));
 }
 void SYoutubeUserManage::Start()
 {
    m_nMode = E_PROCESS_RUN;
    m_bFinalLast = false;
 }
 bool SYoutubeUserManage::Update()
 {
    if(m_bFinalLast) return m_bFinalLast;
    switch(m_nMode)
    {
    case E_PROCESS_RUN:
        if(UseProcess() == false)
        {
 #if defined(Q_OS_WIN32)
            m_pro[0].start("python", QStringList() << "webbasedcrawler.py" << "youtube"  << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
 #else
            m_pro[0].start("/usr/bin/python3", QStringList() << "webbasedcrawler.py" << "youtube" << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
 #endif
            m_nMode = E_PROCESS_FINISH_WAIT;
        }
        break;
    case E_PROCESS_FINISH_WAIT:
        break;
    }
    return m_bFinalLast;
 }
 void SYoutubeUserManage::processFinished(QProcess *pPro, QString _strOut)
 {
    switch(m_nMode)
    {
    case E_PROCESS_FINISH_WAIT:
        m_nMode = E_PROCESS_RUN;
        m_bFinalLast = true;
        m_pMain->InsertLog("Finish Crawling :)");
        m_pMain->SetCrawlingState("Finish");
        m_ncList=1;
        m_bLast = false;
        break;
    }
 }
 void SYoutubeUserManage::readStandardOutput()
 {
    QProcess *pPro = (QProcess*)sender();
    QThread::msleep(100);
    QString str = pPro->readAllStandardOutput();
    QStringList list = str.split("\n", QString::SkipEmptyParts);
    foreach(QString log,list)
    {
        if (m_pMain)
        {
            m_pMain->InsertLog(log);
        }
        else
            exit(0);
    }
 }
 void SYoutubeUserManage::readStandardError()
 {
    QProcess *pPro = (QProcess*)sender();
    QThread::msleep(100);
    QString str = pPro->readAllStandardError();
    QStringList list = str.split("\n", QString::SkipEmptyParts);
    foreach(QString log,list)
    {
        if (m_pMain)
        {
            m_pMain->InsertLog(log);
        }
        else
            exit(0);
    }
 }
--- a/CrawlerList/syoutubeusermanage.h
+++ b/CrawlerList/syoutubeusermanage.h
@@ -0,0 +1,30 @@
 #ifndef YOUTUBE_USER_MANAGE_H
 #define YOUTUBE_USER_MANAGE_H
 #include "smanage.h"
 class SYoutubeUserManage : public SManage
 {
    Q_OBJECT
 public:
    enum E_PROCESS_STATE
    {
        E_PROCESS_RUN = 0,
        E_PROCESS_FINISH_WAIT,
    };
    SYoutubeUserManage(QObject *pObject);
 private:
    QString makeGetListQuery(QString _str,QDate _date,int _nPage);
 private:
    QString m_strListQuery;
    QVector <QString> m_strListURL;
 protected:
    bool Update();
    void Start();
    void processFinished(QProcess *pPro,QString _strOut);
    void ReLoadList();
 private slots:
    void readStandardOutput();
    void readStandardError();
 };
 #endif // YOUTUBE_USER_MANAGE_H
--- a/CrawlerList/widget.cpp
+++ b/CrawlerList/widget.cpp
@@ -22,6 +22,10 @@
 #include "sfacebooktagmanage.h"
 #include "sfacebookusermanage.h"
 #include "snaverblogaccuracymanage.h"
 #include "stwittertagmanage.h"
 #include "stwitterusermanage.h"
 #include "syoutubetagmanage.h"
 #include "syoutubeusermanage.h"
 #include <QApplication>
 #include <QLabel>
@@ -51,6 +55,10 @@ Widget::Widget(QWidget *parent) : QWidget(parent) , m_nMode(E_MODE_WAIT)
    m_pFacebookTag = new SFacebookTagManage(this);
    m_pFacebookUser = new SFacebookUserManage(this);
    m_pNaverBlogAccuracy = new SNaverBlogAccuracyManage(this);
    m_pTwitterTag = new STwitterTagManage(this);
    m_pTwitterUser = new STwitterUserManage(this);
    m_pYoutubeTag = new SYoutubeTagManage(this);
    m_pYoutubeUser = new SYoutubeUserManage(this);
    m_pManage[0] = m_pNaverCafe;
    m_pManage[1] = m_pNaverBlog;
@@ -66,6 +74,10 @@ Widget::Widget(QWidget *parent) : QWidget(parent) , m_nMode(E_MODE_WAIT)
    m_pManage[11] = m_pFacebookTag;
    m_pManage[12] = m_pFacebookUser;
    m_pManage[13] = m_pNaverBlogAccuracy;
    m_pManage[14] = m_pTwitterTag;
    m_pManage[15] = m_pTwitterUser;
    m_pManage[16] = m_pYoutubeTag;
    m_pManage[17] = m_pYoutubeUser;
    m_db = QSqlDatabase::addDatabase("QMYSQL");
    m_db.setHostName("bigbird.iptime.org");
--- a/CrawlerList/widget.h
+++ b/CrawlerList/widget.h
@@ -26,6 +26,10 @@ class SInstaUserManage;
 class SFacebookTagManage;
 class SFacebookUserManage;
 class SNaverBlogAccuracyManage;
 class STwitterTagManage;
 class STwitterUserManage;
 class SYoutubeTagManage;
 class SYoutubeUserManage;
 #define SAFE_DELETE(p) {if(p) delete (p); (p) = NULL; }
@@ -55,7 +59,7 @@ private:
    QLineEdit *m_pedStartDay;
    QTimer m_timer,m_timerAlive;
    QSqlDatabase m_db;    
-    static const int C_PLATFORM_MAX = 14;
+    static const int C_PLATFORM_MAX = 18;
    SManage *m_pManage[C_PLATFORM_MAX];
    QListWidget	*m_pResultList;
    QString m_strFileName;
@@ -75,6 +79,11 @@ private:
    SFacebookTagManage *m_pFacebookTag;
    SFacebookUserManage *m_pFacebookUser;
    SNaverBlogAccuracyManage *m_pNaverBlogAccuracy;
    STwitterTagManage* m_pTwitterTag;
    STwitterUserManage* m_pTwitterUser;
    SYoutubeTagManage* m_pYoutubeTag;
    SYoutubeUserManage* m_pYoutubeUser;
    int m_nStartTime,m_nRangeTime,m_nPlatform;    
    //QGroupBox *m_pgbManual;
    QCheckBox *m_pcheckboxReal;
--- a/CrawlerProcess/main.cpp
+++ b/CrawlerProcess/main.cpp
@@ -1,4 +1,4 @@
-#include "scrawler.h"
+#include "scrawler.h"
 #include <QCoreApplication>
 #include <iostream>
--- a/CrawlerProcess/scrawler.cpp
+++ b/CrawlerProcess/scrawler.cpp
@@ -1,4 +1,4 @@
-#include "scrawler.h"
+#include "scrawler.h"
 #include <iostream>
 #include <QSqlQuery>
 #include <QSqlError>
--- a/CrawlerProcess/scrawler.h
+++ b/CrawlerProcess/scrawler.h
@@ -1,4 +1,4 @@
-#ifndef SCRAWLER_H
+#ifndef SCRAWLER_H
 #define SCRAWLER_H
 #include <QtWebKitWidgets>
--- a/CrawlerProcess/scrawlerdata.h
+++ b/CrawlerProcess/scrawlerdata.h
@@ -1,4 +1,4 @@
-#ifndef SCRAWLERDATA
+#ifndef SCRAWLERDATA
 #define SCRAWLERDATA
 #endif // SCRAWLERDATA
--- a/GroupManager/widget.cpp
+++ b/GroupManager/widget.cpp
@@ -85,6 +85,10 @@ Widget::Widget(QWidget *parent)
                              "WHEN 11 THEN 'Facebook Tag' "
                              "WHEN 12 THEN 'Facebook User' "
                              "WHEN 13 THEN 'Naver Blog Accuracy' "
                              "WHEN 14 THEN 'Twitter Tag' "
                              "WHEN 15 THEN 'Twitter User' "
                              "WHEN 16 THEN 'Youtube Tag' "
                              "WHEN 17 THEN 'Youtube User' "
                              "ELSE 'UnKnown'"
                              "END AS platform FROM keyword where state is null");
    m_pmodelGroup->setQuery("SELECT * FROM datagroup");
@@ -140,7 +144,7 @@ QGroupBox *Widget::setKeywordWidgets()
        m_pcbPlatform = new QComboBox;
        m_pcbPlatform->addItems(QStringList() << "Naver Cafe" << "Naver Blog" << "Daum Cafe" << "Naver News" << "Naver Cafe List" << "Daum Cafe List"
                                << "Kakao Story Channel" << "Kakao Story Tag" << "Kakao Story User" << "Instagram Tag" << "Instagram User"
-                                << "Facebook Tag" << "Facebook User" << "Naver Blog Accuracy");
+                                << "Facebook Tag" << "Facebook User" << "Naver Blog Accuracy" << "Twitter Tag" << "Twitter User" << "Youtube Tag" << "Youtube User");
        m_pleKeyword = new QLineEdit;
        m_pleAuthorship = new QLineEdit;
@@ -380,6 +384,10 @@ void Widget::on_keyword_currentRowChanged(QModelIndex _index)
        if (str == QString("Facebook Tag")) nSelect = 11;
        if (str == QString("Facebook User")) nSelect = 12;
        if (str == QString("Naver Blog Accuracy")) nSelect = 13;
        if (str == QString("Twitter Tag")) nSelect = 14;
        if (str == QString("Twitter User")) nSelect = 15;
        if (str == QString("Youtube Tag")) nSelect = 16;
        if (str == QString("Youtube User")) nSelect = 17;
        m_pcbPlatform->setCurrentIndex(nSelect);
    }
 }
@@ -504,6 +512,10 @@ void Widget::on_keyword_button_insert()
                              "WHEN 11 THEN 'Facebook Tag' "
                              "WHEN 12 THEN 'Facebook User' "
                              "WHEN 13 THEN 'Naver Blog Accuracy' "
                              "WHEN 14 THEN 'Twitter Tag' "
                              "WHEN 15 THEN 'Twitter User' "
                              "WHEN 16 THEN 'Youtube Tag' "
                              "WHEN 17 THEN 'Youtube User' "
                              "ELSE 'UnKnown'"
                              "END AS platform FROM keyword where state is null");
 }
@@ -535,6 +547,10 @@ void Widget::on_keyword_button_delete()
                              "WHEN 11 THEN 'Facebook Tag' "
                              "WHEN 12 THEN 'Facebook User' "
                              "WHEN 13 THEN 'Naver Blog Accuracy' "
                              "WHEN 14 THEN 'Twitter Tag' "
                              "WHEN 15 THEN 'Twitter User' "
                              "WHEN 16 THEN 'Youtube Tag' "
                              "WHEN 17 THEN 'Youtube User' "
                              "ELSE 'UnKnown'"
                              "END AS platform FROM keyword where state is null");
 }
@@ -576,6 +592,10 @@ void Widget::on_keyword_button_modify()
                              "WHEN 11 THEN 'Facebook Tag' "
                              "WHEN 12 THEN 'Facebook User' "
                              "WHEN 13 THEN 'Naver Blog Accuracy' "
                              "WHEN 14 THEN 'Twitter Tag' "
                              "WHEN 15 THEN 'Twitter User' "
                              "WHEN 16 THEN 'Youtube Tag' "
                              "WHEN 17 THEN 'Youtube User' "
                              "ELSE 'UnKnown'"
                              "END AS platform FROM keyword where state is null");
 }
@@ -1100,7 +1120,14 @@ void Widget::on_group_button_copy_start()
 void Widget::UpdateCrawling()
 {
    m_pmodelCrawling->setQuery("SELECT _crawling.id,_keyword.realtime,_keyword.searches,_keyword.start,_keyword.end, _datagroup.name , "
-                               "(CASE _keyword.platform WHEN 0 THEN 'Naver Cafe' WHEN 1 THEN 'Naver Blog' WHEN 2 THEN 'Daum Cafe' WHEN 3 THEN 'Naver News' WHEN 4 THEN 'Naver Cafe List' WHEN 5 THEN 'Daum Cafe List' WHEN 6 THEN 'Kakao Story Channel' "
+                               "(CASE _keyword.platform "
                               "WHEN 0 THEN 'Naver Cafe' "
                               "WHEN 1 THEN 'Naver Blog' "
                               "WHEN 2 THEN 'Daum Cafe' "
                               "WHEN 3 THEN 'Naver News' "
                               "WHEN 4 THEN 'Naver Cafe List' "
                               "WHEN 5 THEN 'Daum Cafe List' "
                               "WHEN 6 THEN 'Kakao Story Channel' "
                               "WHEN 7 THEN 'Kakao Story Tag' "
                               "WHEN 8 THEN 'Kakao Story User' "
                               "WHEN 9 THEN 'Instagram Tag' "
@@ -1108,6 +1135,10 @@ void Widget::UpdateCrawling()
                               "WHEN 11 THEN 'Facebook Tag' "
                               "WHEN 12 THEN 'Facebook User' "
                               "WHEN 13 THEN 'Naver Blog Accuracy' "
                               "WHEN 14 THEN 'Twitter Tag' "
                               "WHEN 15 THEN 'Twitter User' "
                               "WHEN 16 THEN 'Youtube Tag' "
                               "WHEN 17 THEN 'Youtube User' "
                               "ELSE 'UnKnown' END ) AS platform , "
                               "(CASE _crawling.state WHEN 0 THEN 'Waiting' WHEN 1 THEN 'Running' WHEN 2 THEN 'Terminated' ELSE 'None' END ) AS state "
                               "FROM crawling _crawling INNER JOIN keyword _keyword ON _crawling.keyword_id = _keyword.id "
--- a/WebBasedCrawler/base/baseclasses.py
+++ b/WebBasedCrawler/base/baseclasses.py
@@ -32,6 +32,7 @@ def is_debugger_attached():
 is_debug = is_debugger_attached()
 def printl(*objects, sep=' ', end='\n', file=None, flush=True):
    if is_debug:
        cur_frame = inspect.currentframe()
--- a/WebBasedCrawler/base/dbdata.py
+++ b/WebBasedCrawler/base/dbdata.py
@@ -0,0 +1,79 @@
 from pymysql.connections import Connection
 import datetime
 from numbers import Number
 class DataDBRow:
    def __init__(self):
        self.platform_name = None
        self.platform_form = None
        self.platform_title = None
        self.article_form = None
        self.article_parent = None
        self.article_id = None
        self.article_nickname = None
        self.article_title = None
        self.article_data = None
        self.article_url = None
        self.article_hit = 0
        self.article_date = None
        self.article_order = 0
        self.article_profile = None
        self.article_profileurl = None
        self.platform_id = None
        self.keyword_id = -1
        self.reply_url = None
        self.etc = None
    def get_keys(self):
        inst = DataDBRow()
        keys = ()
        for key, value_type in inst.__dict__.items():
            if key.startswith('__') or callable(value_type):
                continue
            keys += key,
        return keys
    def get_values(self, conn, db_num):
        inst = DataDBRow()
        values = ()
        for key, value_type in inst.__dict__.items():
            if key.startswith('__') or callable(value_type):
                continue
            value = self.__dict__[key]
            if isinstance(value, Number):
                values += str(value),
            elif isinstance(value, str):
                values += conn.escape(value.encode('utf8').decode('utf8')),
            else:
                values += conn.escape(value),
        return values
    def get_insert_query(self, conn, db_num):
        inst = DataDBRow()
        keys = ''
        values = ''
        for key, value_type in inst.__dict__.items():
            if key.startswith('__') or callable(value_type):
                continue
            if len(keys) > 0:
                keys += ', '
                values += ', '
            keys += key
            value = self.__dict__[key]
            if isinstance(value, Number):
                values += str(value)
            elif isinstance(value, str):
                values += conn.escape(value.encode('utf8').decode('utf8'))
            else:
                values += conn.escape(value)
        query = 'insert into data_{} ({}) values ({})'.format(db_num, keys, values)
        return query
--- a/WebBasedCrawler/base/proxy.py
+++ b/WebBasedCrawler/base/proxy.py
@@ -97,6 +97,31 @@ def get_driver(platform, proxies):
    else:
        return platform_webdriver[platform](capabilities=desired_capabilities)
 _expired_proxies = []
 def set_proxy_expired(proxy):
    if proxy not in _expired_proxies:
        _expired_proxies.append(proxy)
    address = proxy['http'][len('http://'):]
    with open(proxy_filename, 'r') as f:
        lines = f.readlines()
    expired_idx = -1
    for idx, line in enumerate(lines):
        if line.startswith(address):
            expired_idx = idx
            break
    if expired_idx >= 0:
        lines[expired_idx] = '# ' + lines[expired_idx]
        lines.append(lines.pop(expired_idx))
    with open(proxy_filename, 'w') as f:
        f.writelines(lines)
 def get_proxy_from_file(filename):
    """
@@ -104,7 +129,7 @@ def get_proxy_from_file(filename):
    :return (ip, port): string, string
    if ip, port or filename is invalid, return (None, None)
    """
-    proxy_lists = [line.replace('\n', '') for line in open(filename) if re_ip.search(line)]
+    proxy_lists = [line.replace('\n', '') for line in open(filename) if not line.strip().startswith('#') and re_ip.search(line)]
    if proxy_lists:
        m = re_ip.search(proxy_lists[random.randint(0, len(proxy_lists) - 1)])
        if m:
--- a/WebBasedCrawler/requirements.txt
+++ b/WebBasedCrawler/requirements.txt
@@ -0,0 +1,3 @@
 requests
 bs4
 pytz
--- a/WebBasedCrawler/twitter/init.py
+++ b/WebBasedCrawler/twitter/init.py
--- a/WebBasedCrawler/twitter/twconfig.py
+++ b/WebBasedCrawler/twitter/twconfig.py
@@ -0,0 +1,62 @@
 import datetime
 import copy
 class TwitterConfig:
    protocol = 'https'
    top_url = 'twitter.com'
    search_url = '/i/search/timeline'
    conversation_url_form = '/i/{}/conversation/{}'
    def __init__(self):
        self.keyword_id = -1
        self.db_num = -1
        self.id = 0
        self.realtime = False
        self.keywords = []
        self.start_str = None
        self.start = None
        self.end_str = None
        self.end = None
        self.authorship = None
        self.state = None
        self.platform = None
    def set_param(self, keyword_id, db_num, params):
        self.keyword_id = int(keyword_id)
        self.db_num = int(db_num)
        self.id = int(params['id'])
        self.realtime = params['realtime'] == '1'
        self.keywords = []
        for keyword in params['searches'].split(','):
            self.keywords.append(keyword.strip())
        self.start_str = str(params['start'])
        self.end_str = str(params['end'])
        self.start = datetime.datetime.combine(params['start'], datetime.datetime.min.time())
        self.end = datetime.datetime.combine(params['end'], datetime.datetime.min.time())
        self.authorship = params['authorship']
        self.state = params['state']
        self.platform = params['platform']
    def split(self):
        split_list = []
        new_end = self.end
        while new_end > self.start:
            new_config = copy.deepcopy(self)
            new_config.end = new_end
            new_end = new_end + datetime.timedelta(days=-1)
            new_config.start = new_end
            new_config.start_str = new_config.start.strftime('%Y-%m-%d')
            new_config.end_str = new_config.end.strftime('%Y-%m-%d')
            split_list.append(new_config)
        return split_list
--- a/WebBasedCrawler/twitter/twdbhelper.py
+++ b/WebBasedCrawler/twitter/twdbhelper.py
@@ -0,0 +1,79 @@
 from twitter.tweet import Tweet
 import multiprocessing as mp
 class TwitterDBHelper:
    pymysql = __import__('pymysql.cursors')
    def __init__(self):
        self.tweets = []
        self.buffer = []
        self.lock = mp.Lock()
        pass
    def __del__(self):
        pass
    def get_param(self, keyword_id):
        query = "select * from keyword where id = " + str(keyword_id)
        params = []
        try:
            conn = self.pymysql.connect(host='bigbird.iptime.org',
                                        user='admin', passwd='admin123',
                                        db='concepters', charset='utf8',
                                        cursorclass=self.pymysql.cursors.DictCursor)
            with conn.cursor() as cursor:
                cursor.execute(query)
                params = cursor.fetchone()
        except Exception as e:
            print(e)
            exit(1)
        else:
            conn.close()
        return params
    def insert_tweet(self, tweet: Tweet = None, db_num: int = -1, flush=False):
        # self.lock.acquire()
        # if tweet is not None:
        #     self.buffer.append((tweet, db_num, ))
        #
        # local_buffer = None
        # if len(self.buffer) >= 100 or flush:
        #     local_buffer = copy.deepcopy(self.buffer)
        #     self.buffer.clear()
        # self.lock.release()
        local_buffer = [(tweet, db_num, )]
        if local_buffer:
            while True:
                try:
                    conn = self.pymysql.connect(host='bigbird.iptime.org',
                                                user='admin', passwd='admin123',
                                                db='concepters', charset='utf8',
                                                cursorclass=self.pymysql.cursors.DictCursor,
                                                connect_timeout=5)
                except Exception as e:
                    print(e)
                    continue
                else:
                    break
            try:
                with conn.cursor() as cursor:
                    for tweet, _db_num in local_buffer:
                        query = tweet.get_insert_query(conn, _db_num)
                        cursor.execute(query)
                    conn.commit()
            except Exception as e:
                print(e)
            finally:
                conn.close()
--- a/WebBasedCrawler/twitter/tweet.py
+++ b/WebBasedCrawler/twitter/tweet.py
@@ -0,0 +1,24 @@
 from base.dbdata import DataDBRow
 class Tweet(DataDBRow):
    def __init__(self):
        super(self.__class__, self).__init__()
        self.tweet_id = None
        self.user_id = None
        self.user_name = None
        self.text = None
        self.created_at = None
        self.retweets = 0
        self.favorites = 0
        self.is_reply = False
        self.reply_cnt = 0
        self.retweet_cnt = 0
        self.favorite_cnt = 0
        self.top_link = None
        self.tweet_link = None
        self.depth = 0
--- a/WebBasedCrawler/twitter/twittercrawl.py
+++ b/WebBasedCrawler/twitter/twittercrawl.py
@@ -0,0 +1,289 @@
 from twitter.twconfig import TwitterConfig
 from twitter.twdbhelper import TwitterDBHelper
 from twitter.tweet import Tweet
 from twitter.twparser import TweetParser
 import base.proxy
 import base.baseclasses
 import requests
 import bs4
 import json
 import urllib
 import threading
 import queue
 import time
 class TwitterCrawler():
    def __init__(self):
        self.default_config = TwitterConfig()
        self.db_helper = TwitterDBHelper()
    def set_arguments(self, browser, keyword_id, db_num, before_day, until_page):
        params = self.db_helper.get_param(keyword_id)
        self.default_config.set_param(keyword_id, db_num, params)
    @staticmethod
    def get_timeline_url(query, start_str, end_str, max_position=''):
        params = {
            'f': 'tweets',
            'vertical': 'default',
            'src': 'typd',
            'q': '{} since:{} until:{}'.format(query, start_str, end_str),
            'language': 'en',
            'max_position': max_position,
        }
        url_tupple = (TwitterConfig.protocol, TwitterConfig.top_url, TwitterConfig.search_url, '', urllib.parse.urlencode(params), '')
        return urllib.parse.urlunparse(url_tupple)
    @staticmethod
    def get_content_url(user_id, tweet_id, max_position=''):
        params = {
            'max_position': max_position,
        }
        sub_url = TwitterConfig.conversation_url_form.format(user_id, tweet_id)
        url_tupple = (TwitterConfig.protocol, TwitterConfig.top_url, sub_url, '', urllib.parse.urlencode(params), '')
        return urllib.parse.urlunparse(url_tupple)
    @staticmethod
    def get_page(url, proc_id):
        headers = {
            'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
            'Accept-Language': 'ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4',
        }
        # if proxies is None:
        proxies = base.proxy.get_proxy_for_requests()
        resp = None
        while True:
            try:
                resp = requests.get(url, headers=headers, proxies=proxies, timeout=3)
            except Exception as e:
                if proxies == (None, None):
                    break
                print('[{}] proxy {} is expired. ({})'.format(proc_id, proxies, e))
                base.proxy.set_proxy_expired(proxies)
                proxies = base.proxy.get_proxy_for_requests()
            else:
                break
        return resp
    def runner_proc(self, proc_id, content_queue, result_queue, config):
        print('{} to {} runner thread start'.format(config.start_str, config.end_str))
        b_continue = True
        min_tweet_id = None
        max_tweet_id = None
        max_position = ''
        tweet_count = 0
        while b_continue:
            if min_tweet_id is not None:
                max_position = 'TWEET-{}-{}'.format(max_tweet_id, min_tweet_id)
            url = self.get_timeline_url(config.keywords[0], config.start_str, config.end_str, max_position)
            resp = self.get_page(url, proc_id)
            if resp is None:
                break
            j = json.loads(resp.content.decode('utf-8'))
            soup = bs4.BeautifulSoup(j['items_html'], 'lxml')
            tweet_tags = soup.select("div.tweet")
            for tw in tweet_tags:
                tweet = TweetParser.parse(tw, config.keyword_id)
                if tweet.is_reply is True:
                    # print('  ## {}: {}...'.format(tweet.user_name, tweet.text[:20]))
                    continue
                if tweet.reply_cnt > 0:
                    self.insert_content_pool(proc_id, content_queue, tweet, tweet)
                self.db_helper.insert_tweet(tweet, config.db_num)
                # print('{} {}: {}...'.format(tweet.created_at, tweet.user_name, tweet.text[:20]))
            count = len(tweet_tags)
            if count == 0:
                break
            if min_tweet_id is None:
                min_tweet_id = tweet_tags[0].attrs['data-item-id']
            max_tweet_id = tweet_tags[-1].attrs['data-item-id']
            tweet_count += count
        print('{} to {} runner thread finished {}'.format(config.start_str, config.end_str, tweet_count))
        result_queue.put((proc_id, tweet_count, ))
        # self.runner_processing[proc_id].value = False
        return proc_id, tweet_count,
    @staticmethod
    def insert_content_pool(proc_id: int, qu, tweet: Tweet, tweet_top: Tweet):
        # print('    [{}] pool insert: {} ({})'.format(proc_id, tweet.text[:20] if tweet.text else '', tweet.tweet_link))
        qu.put((tweet, tweet_top,))
    @staticmethod
    def get_content(content_queue):
        sleep_time = time.time()
        while True:
            try:
                parent_tw, top_tw, = content_queue.get(block=True, timeout=2)
            except Exception as e:
                if time.time()-sleep_time > 60:
                    break
                else:
                    continue
            else:
                return parent_tw, top_tw,
        return None, None,
    def content_proc(self, proc_id, content_queue, result_queue):
        print('[{}] content thread start'.format(proc_id))
        tweet_count = 0
        while True:
            parent_tw, top_tw, = self.get_content(content_queue)
            if not parent_tw:
                break
            # print('    [{}] <<< parent : {} ({})'.format(proc_id, parent_tw.text[:20], parent_tw.tweet_link))
            max_position = ''
            b_continue = True
            while b_continue:
                url = self.get_content_url(parent_tw.user_id, parent_tw.tweet_id, max_position)
                resp = self.get_page(url, proc_id)
                if resp is None or resp.status_code == 404:
                    break
                elif resp.status_code != 200:
                    print('[WARNING] content_get code {}'.format(resp.status_code))
                    continue
                j = json.loads(resp.content.decode('utf-8'))
                soup = bs4.BeautifulSoup(j['items_html'], 'lxml')
                reply_container_tags = soup.select('li.ThreadedConversation')
                reply_container_tags += TweetParser.get_lone_container(soup, parent_tw)
                for container_tags in reply_container_tags:
                    tweet_tags = container_tags.select('div.tweet')
                    if len(tweet_tags) > 0:
                        tweet = TweetParser.parse(tweet_tags[0], self.default_config.keyword_id, parent_tw.depth+1, top_tw)
                        # print('[{}]>>> {} {}: {} ({}) ({})'.format(proc_id, tweet.created_at, tweet.user_name, tweet.text[:20], tweet.depth, tweet.tweet_link))
                        self.insert_content_pool(proc_id, content_queue, tweet, top_tw)
                        self.db_helper.insert_tweet(tweet, self.default_config.db_num)
                        tweet_count += 1
                b_continue = j['has_more_items']
                if b_continue:
                    max_position = j['min_position']
        result_queue.put((proc_id, tweet_count))
        print('[{}] content thread finished'.format(proc_id))
        return proc_id, tweet_count,
    def debug_content(self):
        content_qu = queue.Queue()
        runner_result_qu = queue.Queue()
        content_result_qu = queue.Queue()
        test_tw = Tweet()
        # test_tw.tweet_link = 'https://twitter.com/yniold_/status/886863893137678337'
        # test_tw.user_id = 'yniold_'
        # test_tw.tweet_id = 886863893137678337
        test_tw.tweet_link = 'https://twitter.com/Awesome_vely/status/888704413111435264'
        test_tw.user_id = 'Awesome_vely'
        test_tw.tweet_id = 888704413111435264
        test_tw.text = '시작'
        self.insert_content_pool(0, content_qu, test_tw, test_tw)
        content_threads = [threading.Thread(target=self.content_proc, args=(proc_id, content_qu, content_result_qu)) for proc_id in range(16)]
        [th.start() for th in content_threads]
        [th.join() for th in content_threads]
        while not content_result_qu.empty():
            res = content_result_qu.get()
            print('reply : {}'.format(res))
        print('end all')
    def test_insert_db(self):
        test_tw = Tweet()
        test_tw.tweet_link = 'https://twitter.com/moonriver365/status/885797401033818112'
        test_tw.user_id = 'moonriver365'
        test_tw.tweet_id = 885797401033818112
        for _ in range(5):
            self.db_helper.insert_tweet(test_tw, self.default_config.db_num)
    def debug(self):
        if base.baseclasses.is_debug:
            ## check proxy
            # base.proxy.get_proxy_from_file('proxy.txt')
            # proxy = {'https': 'http://45.56.86.93:3128', 'http': 'http://45.56.86.93:3128'}
            # base.proxy.set_proxy_expired(proxy)
            # return
            ## contents check
            self.debug_content()
            # split_config = self.default_config.split()
            # self.test_insert_db()
            print("debug end")
            # exit()
    def start(self):
        start_time = time.time()
        # self.debug()
        # return
        # run
        split_config = self.default_config.split()
        content_qu = queue.Queue()
        runner_result_qu = queue.Queue()
        content_result_qu = queue.Queue()
        runner_threads = [threading.Thread(target=self.runner_proc, args=(proc_id, content_qu, runner_result_qu, config)) for proc_id, config in enumerate(split_config)]
        content_threads = [threading.Thread(target=self.content_proc, args=(proc_id, content_qu, content_result_qu)) for proc_id in range(16)]
        [th.start() for th in runner_threads]
        [th.start() for th in content_threads]
        [th.join() for th in runner_threads]
        [th.join() for th in content_threads]
        # rerun zero runners
        runner_threads = []
        runner_result_qu2 = queue.Queue()
        idx = 0
        while not runner_result_qu.empty():
            res = runner_result_qu.get()
            if res == 0:
                th = threading.Thread(target=self.runner_proc, args=(idx, content_qu, runner_result_qu2, split_config[idx]))
                runner_threads.append(th)
            idx += 1
        content_threads = [threading.Thread(target=self.content_proc, args=(proc_id, content_qu, content_result_qu)) for proc_id in range(16)]
        [th.start() for th in runner_threads]
        [th.start() for th in content_threads]
        [th.join() for th in runner_threads]
        [th.join() for th in content_threads]
        # print running time
        delta = time.time() - start_time
        m, s = divmod(delta, 60)
        h, m = divmod(m, 60)
        print("finished all {}:{:02d}:{:02d} ".format(int(h), int(m), int(s)))
--- a/WebBasedCrawler/twitter/twparser.py
+++ b/WebBasedCrawler/twitter/twparser.py
@@ -0,0 +1,96 @@
 from twitter.tweet import Tweet
 from twitter.twconfig import TwitterConfig
 import bs4
 import datetime
 import pytz
 class TweetParser:
    @staticmethod
    def parse(tag, keyword_id, depth=0, top_tw: Tweet=None):
        tweet = Tweet()
        tweet.tweet_id = int(tag.attrs['data-tweet-id'])
        nickname_tag = tag.select('strong.fullname')[0]
        tweet.user_name = ''
        for child in nickname_tag.children:
            if isinstance(child, bs4.element.NavigableString):
                if len(tweet.user_name) > 0:
                    tweet.user_name += ' '
                tweet.user_name += child
        tweet.user_id = tag.select('span.username')[0].text[1:]
        tweet.text = tag.select('p.tweet-text')[0].text
        # time_str = tag.select('a.tweet-timestamp')[0].attrs['title']
        # english
        # tweet.created_at = datetime.datetime.strptime(time_str, '%I:%M %p - %d %b %Y')
        # korean
        # time_str = time_str.replace('오전', 'AM').replace('오후', 'PM')
        # tweet.created_at = datetime.datetime.strptime(time_str, '%p %I:%M - %Y년 %m월 %d일')
        timestamp = int(tag.select('span._timestamp')[0].attrs['data-time'])
        utc_dt = datetime.datetime.utcfromtimestamp(timestamp)
        local_tz = pytz.timezone('Asia/Seoul')
        local_dt = utc_dt.replace(tzinfo=pytz.utc).astimezone(local_tz)
        tweet.created_at = local_tz.normalize(local_dt)
        reply_tag = tag.select('div.ReplyingToContextBelowAuthor')
        tweet.is_reply = len(reply_tag) > 0
        reply_cnt_tag = tag.select('span.ProfileTweet-action--reply > span.ProfileTweet-actionCount')
        if len(reply_cnt_tag) > 0:
            tweet.reply_cnt = int(reply_cnt_tag[0].attrs['data-tweet-stat-count'])
        retweet_cnt_tag = tag.select('span.ProfileTweet-action--retweet > span.ProfileTweet-actionCount')
        if len(retweet_cnt_tag) > 0:
            tweet.retweet_cnt = int(retweet_cnt_tag[0].attrs['data-tweet-stat-count'])
        favorite_cnt_tag = tag.select('span.ProfileTweet-action--favorite > span.ProfileTweet-actionCount')
        if len(favorite_cnt_tag) > 0:
            tweet.favorites_cnt = int(favorite_cnt_tag[0].attrs['data-tweet-stat-count'])
        link_tag = tag.select('a.js-permalink')
        if len(link_tag) > 0:
            tweet.tweet_link = TwitterConfig.protocol + '://' + TwitterConfig.top_url + link_tag[0].attrs['href']
        tweet.top_link = top_tw.tweet_link if top_tw else tweet.tweet_link
        tweet.depth = depth
        tweet.platform_name = 'twitter'
        tweet.platform_form = 'post'
        tweet.platform_title = top_tw.user_id if top_tw else tweet.user_id
        tweet.article_form = 'body' if tweet.depth is 0 else 'reply'
        # tweet.article_parent = None
        tweet.article_id = tweet.user_id
        tweet.article_nickname = tweet.user_name
        # tweet.article_title = None
        tweet.article_data = tweet.text
        tweet.article_url = tweet.top_link
        # tweet.article_hit = 0
        tweet.article_date = tweet.created_at
        tweet.article_order = tweet.depth
        # tweet.article_profile = tweet.user_name
        tweet.article_profileurl = TwitterConfig.protocol + '://' + TwitterConfig.top_url + '/' + tweet.user_id
        tweet.platform_id = top_tw.user_id if top_tw else tweet.user_id
        tweet.keyword_id = keyword_id
        tweet.reply_url = tweet.tweet_link
        # tweet.etc = ''
        return tweet
    @staticmethod
    def get_lone_container(soup, parent_tw):
        lone_tweets = soup.select('div.ThreadedConversation--loneTweet')
        container_tags = []
        for tag in reversed(lone_tweets):
            li = tag.select('li.stream-item')
            if len(li) > 0 and 'data-item-id' in li[0].attrs:
                tweet_id = int(li[0].attrs['data-item-id'])
            if tweet_id == parent_tw.tweet_id:
                break
            container_tags.append(tag)
        return reversed(container_tags)
--- a/WebBasedCrawler/webbasedcrawler.py
+++ b/WebBasedCrawler/webbasedcrawler.py
@@ -11,6 +11,8 @@ from kakao import kakaocrawl
 from naver import navercrawl
 from facebook import facebookcrawl
 from facebook import facebookcrawlbs
 from twitter import twittercrawl
 from youtube import youtubecrawl
 from base.baseclasses import print_and_flush
@@ -26,8 +28,12 @@ class WebBasedCrawler:
            self.crawler = kakaocrawl.KakaoMainCrawler()
        elif platform == "navercafe":
            self.crawler = navercrawl.NaverCafeMainAreaCrawler()
-        elif platform == "facebook":
+        elif platform == 'facebook':
            self.crawler = facebookcrawlbs.FacebookMainCrawler()
        elif platform == 'twitter':
            self.crawler = twittercrawl.TwitterCrawler()
        elif platform == 'youtube':
            self.crawler = youtubecrawl.YoutubeMainCrawler()
        else:
            self.crawler = None
            raise Exception
@@ -38,7 +44,7 @@ class WebBasedCrawler:
 browser_opt = ('chrome', "ie", "opera", "firefox")
-platform_opt = ('instagram', 'kakaostory', 'navercafe', "facebook")
+platform_opt = ('instagram', 'kakaostory', 'navercafe', 'facebook', 'twitter', 'youtube')
 def get_browser_info(platform_, file_name="browser.txt"):
@@ -73,7 +79,7 @@ def get_browser_info(platform_, file_name="browser.txt"):
 if __name__ == '__main__':
    """
    sys.argv[0] webbasedcrawler.py
-    sys.argv[1] instagram, kakaochannel, navercafe, facebook
+    sys.argv[1] instagram, kakaochannel, navercafe, facebook, twitter, youtube
    sys.argv[2] keyword_id
    sys.argv[3] data group
    sys.argv[4] start_day
@@ -85,8 +91,7 @@ if __name__ == '__main__':
    else:
        print_and_flush("Check Argumenets!")
        exit(1)
-    crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2],
+    crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
                              sys.argv[3], sys.argv[4], sys.argv[5])
    crawler.start()
    print_and_flush("Finished Crawling :)")
    exit(0)
--- a/WebBasedCrawler/youtube/init.py
+++ b/WebBasedCrawler/youtube/init.py
--- a/WebBasedCrawler/youtube/youtubecrawl.py
+++ b/WebBasedCrawler/youtube/youtubecrawl.py
@@ -0,0 +1,7 @@
 class YoutubeMainCrawl:
    def __init__(self):
        pass
    def start(self):
        pass