Merge branch 'working/twitter'

2017-07-27 11:33:10 +09:00
parent 64fc36da50 5e8ba15bbd
commit 922aaabe74
29 changed files with 1221 additions and 15 deletions
--- a/CrawlerList/CrawlerList.pro
+++ b/CrawlerList/CrawlerList.pro
@@ -28,7 +28,11 @@ SOURCES += main.cpp\
    skakaousermanage.cpp \
    sfacebooktagmanage.cpp \
    sfacebookusermanage.cpp \
-    snaverblogaccuracymanager.cpp
+    snaverblogaccuracymanager.cpp \
+    stwittertagmanage.cpp \
+    stwitterusermanage.cpp \
+    syoutubetagmanage.cpp \
+    syoutubeusermanage.cpp

 HEADERS  += widget.h \
    smanage.h \
@@ -45,5 +49,9 @@ HEADERS  += widget.h \
    skakaousermanage.h \
    sfacebooktagmanage.h \
    sfacebookusermanage.h \
-    snaverblogaccuracymanage.h
+    snaverblogaccuracymanage.h \
+    stwittertagmanage.h \
+    stwitterusermanage.h \
+    syoutubetagmanage.h \
+    syoutubeusermanage.h

--- a/CrawlerList/stwittertagmanage.cpp
+++ b/CrawlerList/stwittertagmanage.cpp
@@ -0,0 +1,89 @@
+#include "stwittertagmanage.h"
+#include <QThread>
+#include "widget.h"
+STwitterTagManage::STwitterTagManage(QObject *pObject) : SManage(pObject)
+{
+    m_nID = 0;
+    connect(&m_pro[0], SIGNAL(readyReadStandardOutput()), this, SLOT(readStandardOutput()));
+    connect(&m_pro[0], SIGNAL(readyReadStandardError()), this, SLOT(readStandardError()));
+}
+
+
+void STwitterTagManage::Start()
+{
+    m_nMode = E_PROCESS_RUN;
+    m_bFinalLast = false;
+}
+
+bool STwitterTagManage::Update()
+{
+    if(m_bFinalLast) return m_bFinalLast;
+    switch(m_nMode)
+    {
+    case E_PROCESS_RUN:
+        if(UseProcess() == false)
+        {
+#if defined(Q_OS_WIN32)
+            m_pro[0].start("python", QStringList() << "webbasedcrawler.py" << "twitter"  << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
+#else
+            m_pro[0].start("/usr/bin/python3", QStringList() << "webbasedcrawler.py" << "twitter" << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
+#endif
+            m_nMode = E_PROCESS_FINISH_WAIT;
+        }
+        break;
+
+    case E_PROCESS_FINISH_WAIT:
+        break;
+    }
+
+    return m_bFinalLast;
+}
+
+void STwitterTagManage::processFinished(QProcess *pPro, QString _strOut)
+{
+    switch(m_nMode)
+    {
+    case E_PROCESS_FINISH_WAIT:
+        m_nMode = E_PROCESS_RUN;
+        m_bFinalLast = true;
+        m_pMain->InsertLog("Finish Crawling :)");
+        m_pMain->SetCrawlingState("Finish");
+        m_ncList=1;
+        m_bLast = false;
+        break;
+    }
+}
+
+void STwitterTagManage::readStandardOutput()
+{
+    QProcess *pPro = (QProcess*)sender();
+    QThread::msleep(100);
+    QString str = pPro->readAllStandardOutput();
+    QStringList list = str.split("\n", QString::SkipEmptyParts);
+    foreach(QString log,list)
+    {
+        if (m_pMain)
+        {
+            m_pMain->InsertLog(log);
+        }
+        else
+            exit(0);
+    }
+}
+
+void STwitterTagManage::readStandardError()
+{
+    QProcess *pPro = (QProcess*)sender();
+    QThread::msleep(100);
+    QString str = pPro->readAllStandardError();
+    QStringList list = str.split("\n", QString::SkipEmptyParts);
+    foreach(QString log,list)
+    {
+        if (m_pMain)
+        {
+            m_pMain->InsertLog(log);
+        }
+        else
+            exit(0);
+    }
+}
--- a/CrawlerList/stwittertagmanage.h
+++ b/CrawlerList/stwittertagmanage.h
@@ -0,0 +1,30 @@
+#ifndef STWITTERTAGMANAGE_H
+#define STWITTERTAGMANAGE_H
+#include "smanage.h"
+
+class STwitterTagManage : public SManage
+{
+    Q_OBJECT
+public:
+    enum E_PROCESS_STATE
+    {
+        E_PROCESS_RUN = 0,
+        E_PROCESS_FINISH_WAIT,
+    };
+    STwitterTagManage(QObject *pObject);
+private:
+    QString makeGetListQuery(QString _str,QDate _date,int _nPage);
+private:
+    QString m_strListQuery;
+    QVector <QString> m_strListURL;
+protected:
+    bool Update();
+    void Start();
+    void processFinished(QProcess *pPro,QString _strOut);
+    void ReLoadList();
+private slots:
+    void readStandardOutput();
+    void readStandardError();
+};
+#endif // STWITTERTAGMANAGE_H
+
--- a/CrawlerList/stwitterusermanage.cpp
+++ b/CrawlerList/stwitterusermanage.cpp
@@ -0,0 +1,89 @@
+#include "stwitterusermanage.h"
+#include <QThread>
+#include "widget.h"
+STwitterUserManage::STwitterUserManage(QObject *pObject) : SManage(pObject)
+{
+    m_nID = 0;
+    connect(&m_pro[0], SIGNAL(readyReadStandardOutput()), this, SLOT(readStandardOutput()));
+    connect(&m_pro[0], SIGNAL(readyReadStandardError()), this, SLOT(readStandardError()));
+}
+
+
+void STwitterUserManage::Start()
+{
+    m_nMode = E_PROCESS_RUN;
+    m_bFinalLast = false;
+}
+
+bool STwitterUserManage::Update()
+{
+    if(m_bFinalLast) return m_bFinalLast;
+    switch(m_nMode)
+    {
+    case E_PROCESS_RUN:
+        if(UseProcess() == false)
+        {
+#if defined(Q_OS_WIN32)
+            m_pro[0].start("python", QStringList() << "webbasedcrawler.py" << "twitter"  << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
+#else
+            m_pro[0].start("/usr/bin/python3", QStringList() << "webbasedcrawler.py" << "twitter" << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
+#endif
+            m_nMode = E_PROCESS_FINISH_WAIT;
+        }
+        break;
+
+    case E_PROCESS_FINISH_WAIT:
+        break;
+    }
+
+    return m_bFinalLast;
+}
+
+void STwitterUserManage::processFinished(QProcess *pPro, QString _strOut)
+{
+    switch(m_nMode)
+    {
+    case E_PROCESS_FINISH_WAIT:
+        m_nMode = E_PROCESS_RUN;
+        m_bFinalLast = true;
+        m_pMain->InsertLog("Finish Crawling :)");
+        m_pMain->SetCrawlingState("Finish");
+        m_ncList=1;
+        m_bLast = false;
+        break;
+    }
+}
+
+void STwitterUserManage::readStandardOutput()
+{
+    QProcess *pPro = (QProcess*)sender();
+    QThread::msleep(100);
+    QString str = pPro->readAllStandardOutput();
+    QStringList list = str.split("\n", QString::SkipEmptyParts);
+    foreach(QString log,list)
+    {
+        if (m_pMain)
+        {
+            m_pMain->InsertLog(log);
+        }
+        else
+            exit(0);
+    }
+}
+
+void STwitterUserManage::readStandardError()
+{
+    QProcess *pPro = (QProcess*)sender();
+    QThread::msleep(100);
+    QString str = pPro->readAllStandardError();
+    QStringList list = str.split("\n", QString::SkipEmptyParts);
+    foreach(QString log,list)
+    {
+        if (m_pMain)
+        {
+            m_pMain->InsertLog(log);
+        }
+        else
+            exit(0);
+    }
+}
--- a/CrawlerList/stwitterusermanage.h
+++ b/CrawlerList/stwitterusermanage.h
@@ -0,0 +1,30 @@
+#ifndef STWITTERUSERMANAGE_H
+#define STWITTERUSERMANAGE_H
+#include "smanage.h"
+
+class STwitterUserManage : public SManage
+{
+    Q_OBJECT
+public:
+    enum E_PROCESS_STATE
+    {
+        E_PROCESS_RUN = 0,
+        E_PROCESS_FINISH_WAIT,
+    };
+    STwitterUserManage(QObject *pObject);
+private:
+    QString makeGetListQuery(QString _str,QDate _date,int _nPage);
+private:
+    QString m_strListQuery;
+    QVector <QString> m_strListURL;
+protected:
+    bool Update();
+    void Start();
+    void processFinished(QProcess *pPro,QString _strOut);
+    void ReLoadList();
+private slots:
+    void readStandardOutput();
+    void readStandardError();
+};
+#endif // STWITTERUSERMANAGE_H
+
--- a/CrawlerList/syoutubetagmanage.cpp
+++ b/CrawlerList/syoutubetagmanage.cpp
@@ -0,0 +1,89 @@
+#include "syoutubetagmanage.h"
+#include <QThread>
+#include "widget.h"
+SYoutubeTagManage::SYoutubeTagManage(QObject *pObject) : SManage(pObject)
+{
+    m_nID = 0;
+    connect(&m_pro[0], SIGNAL(readyReadStandardOutput()), this, SLOT(readStandardOutput()));
+    connect(&m_pro[0], SIGNAL(readyReadStandardError()), this, SLOT(readStandardError()));
+}
+
+
+void SYoutubeTagManage::Start()
+{
+    m_nMode = E_PROCESS_RUN;
+    m_bFinalLast = false;
+}
+
+bool SYoutubeTagManage::Update()
+{
+    if(m_bFinalLast) return m_bFinalLast;
+    switch(m_nMode)
+    {
+    case E_PROCESS_RUN:
+        if(UseProcess() == false)
+        {
+#if defined(Q_OS_WIN32)
+            m_pro[0].start("python", QStringList() << "webbasedcrawler.py" << "youtube"  << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
+#else
+            m_pro[0].start("/usr/bin/python3", QStringList() << "webbasedcrawler.py" << "youtube" << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
+#endif
+            m_nMode = E_PROCESS_FINISH_WAIT;
+        }
+        break;
+
+    case E_PROCESS_FINISH_WAIT:
+        break;
+    }
+
+    return m_bFinalLast;
+}
+
+void SYoutubeTagManage::processFinished(QProcess *pPro, QString _strOut)
+{
+    switch(m_nMode)
+    {
+    case E_PROCESS_FINISH_WAIT:
+        m_nMode = E_PROCESS_RUN;
+        m_bFinalLast = true;
+        m_pMain->InsertLog("Finish Crawling :)");
+        m_pMain->SetCrawlingState("Finish");
+        m_ncList=1;
+        m_bLast = false;
+        break;
+    }
+}
+
+void SYoutubeTagManage::readStandardOutput()
+{
+    QProcess *pPro = (QProcess*)sender();
+    QThread::msleep(100);
+    QString str = pPro->readAllStandardOutput();
+    QStringList list = str.split("\n", QString::SkipEmptyParts);
+    foreach(QString log,list)
+    {
+        if (m_pMain)
+        {
+            m_pMain->InsertLog(log);
+        }
+        else
+            exit(0);
+    }
+}
+
+void SYoutubeTagManage::readStandardError()
+{
+    QProcess *pPro = (QProcess*)sender();
+    QThread::msleep(100);
+    QString str = pPro->readAllStandardError();
+    QStringList list = str.split("\n", QString::SkipEmptyParts);
+    foreach(QString log,list)
+    {
+        if (m_pMain)
+        {
+            m_pMain->InsertLog(log);
+        }
+        else
+            exit(0);
+    }
+}
--- a/CrawlerList/syoutubetagmanage.h
+++ b/CrawlerList/syoutubetagmanage.h
@@ -0,0 +1,30 @@
+#ifndef STYOUTUBETAGMANAGE_H
+#define STYOUTUBETAGMANAGE_H
+#include "smanage.h"
+
+class SYoutubeTagManage : public SManage
+{
+    Q_OBJECT
+public:
+    enum E_PROCESS_STATE
+    {
+        E_PROCESS_RUN = 0,
+        E_PROCESS_FINISH_WAIT,
+    };
+    SYoutubeTagManage(QObject *pObject);
+private:
+    QString makeGetListQuery(QString _str,QDate _date,int _nPage);
+private:
+    QString m_strListQuery;
+    QVector <QString> m_strListURL;
+protected:
+    bool Update();
+    void Start();
+    void processFinished(QProcess *pPro,QString _strOut);
+    void ReLoadList();
+private slots:
+    void readStandardOutput();
+    void readStandardError();
+};
+#endif // STYOUTUBETAGMANAGE_H
+
--- a/CrawlerList/syoutubeusermanage.cpp
+++ b/CrawlerList/syoutubeusermanage.cpp
@@ -0,0 +1,89 @@
+#include "syoutubeusermanage.h"
+#include <QThread>
+#include "widget.h"
+SYoutubeUserManage::SYoutubeUserManage(QObject *pObject) : SManage(pObject)
+{
+    m_nID = 0;
+    connect(&m_pro[0], SIGNAL(readyReadStandardOutput()), this, SLOT(readStandardOutput()));
+    connect(&m_pro[0], SIGNAL(readyReadStandardError()), this, SLOT(readStandardError()));
+}
+
+
+void SYoutubeUserManage::Start()
+{
+    m_nMode = E_PROCESS_RUN;
+    m_bFinalLast = false;
+}
+
+bool SYoutubeUserManage::Update()
+{
+    if(m_bFinalLast) return m_bFinalLast;
+    switch(m_nMode)
+    {
+    case E_PROCESS_RUN:
+        if(UseProcess() == false)
+        {
+#if defined(Q_OS_WIN32)
+            m_pro[0].start("python", QStringList() << "webbasedcrawler.py" << "youtube"  << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
+#else
+            m_pro[0].start("/usr/bin/python3", QStringList() << "webbasedcrawler.py" << "youtube" << m_strKeywordID << m_strGroupID << m_pMain->StartDay() << m_pMain->UntilPage());
+#endif
+            m_nMode = E_PROCESS_FINISH_WAIT;
+        }
+        break;
+
+    case E_PROCESS_FINISH_WAIT:
+        break;
+    }
+
+    return m_bFinalLast;
+}
+
+void SYoutubeUserManage::processFinished(QProcess *pPro, QString _strOut)
+{
+    switch(m_nMode)
+    {
+    case E_PROCESS_FINISH_WAIT:
+        m_nMode = E_PROCESS_RUN;
+        m_bFinalLast = true;
+        m_pMain->InsertLog("Finish Crawling :)");
+        m_pMain->SetCrawlingState("Finish");
+        m_ncList=1;
+        m_bLast = false;
+        break;
+    }
+}
+
+void SYoutubeUserManage::readStandardOutput()
+{
+    QProcess *pPro = (QProcess*)sender();
+    QThread::msleep(100);
+    QString str = pPro->readAllStandardOutput();
+    QStringList list = str.split("\n", QString::SkipEmptyParts);
+    foreach(QString log,list)
+    {
+        if (m_pMain)
+        {
+            m_pMain->InsertLog(log);
+        }
+        else
+            exit(0);
+    }
+}
+
+void SYoutubeUserManage::readStandardError()
+{
+    QProcess *pPro = (QProcess*)sender();
+    QThread::msleep(100);
+    QString str = pPro->readAllStandardError();
+    QStringList list = str.split("\n", QString::SkipEmptyParts);
+    foreach(QString log,list)
+    {
+        if (m_pMain)
+        {
+            m_pMain->InsertLog(log);
+        }
+        else
+            exit(0);
+    }
+}
--- a/CrawlerList/syoutubeusermanage.h
+++ b/CrawlerList/syoutubeusermanage.h
@@ -0,0 +1,30 @@
+#ifndef YOUTUBE_USER_MANAGE_H
+#define YOUTUBE_USER_MANAGE_H
+#include "smanage.h"
+
+class SYoutubeUserManage : public SManage
+{
+    Q_OBJECT
+public:
+    enum E_PROCESS_STATE
+    {
+        E_PROCESS_RUN = 0,
+        E_PROCESS_FINISH_WAIT,
+    };
+    SYoutubeUserManage(QObject *pObject);
+private:
+    QString makeGetListQuery(QString _str,QDate _date,int _nPage);
+private:
+    QString m_strListQuery;
+    QVector <QString> m_strListURL;
+protected:
+    bool Update();
+    void Start();
+    void processFinished(QProcess *pPro,QString _strOut);
+    void ReLoadList();
+private slots:
+    void readStandardOutput();
+    void readStandardError();
+};
+#endif // YOUTUBE_USER_MANAGE_H
+
--- a/CrawlerList/widget.cpp
+++ b/CrawlerList/widget.cpp
@@ -22,6 +22,10 @@
 #include "sfacebooktagmanage.h"
 #include "sfacebookusermanage.h"
 #include "snaverblogaccuracymanage.h"
+#include "stwittertagmanage.h"
+#include "stwitterusermanage.h"
+#include "syoutubetagmanage.h"
+#include "syoutubeusermanage.h"
 #include <QApplication>
 #include <QLabel>

@@ -51,6 +55,10 @@ Widget::Widget(QWidget *parent) : QWidget(parent) , m_nMode(E_MODE_WAIT)
    m_pFacebookTag = new SFacebookTagManage(this);
    m_pFacebookUser = new SFacebookUserManage(this);
    m_pNaverBlogAccuracy = new SNaverBlogAccuracyManage(this);
+    m_pTwitterTag = new STwitterTagManage(this);
+    m_pTwitterUser = new STwitterUserManage(this);
+    m_pYoutubeTag = new SYoutubeTagManage(this);
+    m_pYoutubeUser = new SYoutubeUserManage(this);

    m_pManage[0] = m_pNaverCafe;
    m_pManage[1] = m_pNaverBlog;
@@ -66,6 +74,10 @@ Widget::Widget(QWidget *parent) : QWidget(parent) , m_nMode(E_MODE_WAIT)
    m_pManage[11] = m_pFacebookTag;
    m_pManage[12] = m_pFacebookUser;
    m_pManage[13] = m_pNaverBlogAccuracy;
+    m_pManage[14] = m_pTwitterTag;
+    m_pManage[15] = m_pTwitterUser;
+    m_pManage[16] = m_pYoutubeTag;
+    m_pManage[17] = m_pYoutubeUser;

    m_db = QSqlDatabase::addDatabase("QMYSQL");
    m_db.setHostName("bigbird.iptime.org");
--- a/CrawlerList/widget.h
+++ b/CrawlerList/widget.h
@@ -26,6 +26,10 @@ class SInstaUserManage;
 class SFacebookTagManage;
 class SFacebookUserManage;
 class SNaverBlogAccuracyManage;
+class STwitterTagManage;
+class STwitterUserManage;
+class SYoutubeTagManage;
+class SYoutubeUserManage;

 #define SAFE_DELETE(p) {if(p) delete (p); (p) = NULL; }

@@ -55,7 +59,7 @@ private:
    QLineEdit *m_pedStartDay;
    QTimer m_timer,m_timerAlive;
    QSqlDatabase m_db;    
-    static const int C_PLATFORM_MAX = 14;
+    static const int C_PLATFORM_MAX = 18;
    SManage *m_pManage[C_PLATFORM_MAX];
    QListWidget	*m_pResultList;
    QString m_strFileName;
@@ -75,6 +79,11 @@ private:
    SFacebookTagManage *m_pFacebookTag;
    SFacebookUserManage *m_pFacebookUser;
    SNaverBlogAccuracyManage *m_pNaverBlogAccuracy;
+    STwitterTagManage* m_pTwitterTag;
+    STwitterUserManage* m_pTwitterUser;
+    SYoutubeTagManage* m_pYoutubeTag;
+    SYoutubeUserManage* m_pYoutubeUser;
+
    int m_nStartTime,m_nRangeTime,m_nPlatform;    
    //QGroupBox *m_pgbManual;
    QCheckBox *m_pcheckboxReal;
--- a/CrawlerProcess/main.cpp
+++ b/CrawlerProcess/main.cpp
@@ -1,4 +1,4 @@
-#include "scrawler.h"
+#include "scrawler.h"

 #include <QCoreApplication>
 #include <iostream>
--- a/CrawlerProcess/scrawler.cpp
+++ b/CrawlerProcess/scrawler.cpp
@@ -1,4 +1,4 @@
-#include "scrawler.h"
+#include "scrawler.h"
 #include <iostream>
 #include <QSqlQuery>
 #include <QSqlError>
--- a/CrawlerProcess/scrawler.h
+++ b/CrawlerProcess/scrawler.h
@@ -1,4 +1,4 @@
-#ifndef SCRAWLER_H
+#ifndef SCRAWLER_H
 #define SCRAWLER_H

 #include <QtWebKitWidgets>
--- a/CrawlerProcess/scrawlerdata.h
+++ b/CrawlerProcess/scrawlerdata.h
@@ -1,4 +1,4 @@
-#ifndef SCRAWLERDATA
+#ifndef SCRAWLERDATA
 #define SCRAWLERDATA

 #endif // SCRAWLERDATA
--- a/GroupManager/widget.cpp
+++ b/GroupManager/widget.cpp
@@ -85,6 +85,10 @@ Widget::Widget(QWidget *parent)
                              "WHEN 11 THEN 'Facebook Tag' "
                              "WHEN 12 THEN 'Facebook User' "
                              "WHEN 13 THEN 'Naver Blog Accuracy' "
+                              "WHEN 14 THEN 'Twitter Tag' "
+                              "WHEN 15 THEN 'Twitter User' "
+                              "WHEN 16 THEN 'Youtube Tag' "
+                              "WHEN 17 THEN 'Youtube User' "
                              "ELSE 'UnKnown'"
                              "END AS platform FROM keyword where state is null");
    m_pmodelGroup->setQuery("SELECT * FROM datagroup");
@@ -140,7 +144,7 @@ QGroupBox *Widget::setKeywordWidgets()
        m_pcbPlatform = new QComboBox;
        m_pcbPlatform->addItems(QStringList() << "Naver Cafe" << "Naver Blog" << "Daum Cafe" << "Naver News" << "Naver Cafe List" << "Daum Cafe List"
                                << "Kakao Story Channel" << "Kakao Story Tag" << "Kakao Story User" << "Instagram Tag" << "Instagram User"
-                                << "Facebook Tag" << "Facebook User" << "Naver Blog Accuracy");
+                                << "Facebook Tag" << "Facebook User" << "Naver Blog Accuracy" << "Twitter Tag" << "Twitter User" << "Youtube Tag" << "Youtube User");

        m_pleKeyword = new QLineEdit;
        m_pleAuthorship = new QLineEdit;
@@ -380,6 +384,10 @@ void Widget::on_keyword_currentRowChanged(QModelIndex _index)
        if (str == QString("Facebook Tag")) nSelect = 11;
        if (str == QString("Facebook User")) nSelect = 12;
        if (str == QString("Naver Blog Accuracy")) nSelect = 13;
+        if (str == QString("Twitter Tag")) nSelect = 14;
+        if (str == QString("Twitter User")) nSelect = 15;
+        if (str == QString("Youtube Tag")) nSelect = 16;
+        if (str == QString("Youtube User")) nSelect = 17;
        m_pcbPlatform->setCurrentIndex(nSelect);
    }
 }
@@ -504,6 +512,10 @@ void Widget::on_keyword_button_insert()
                              "WHEN 11 THEN 'Facebook Tag' "
                              "WHEN 12 THEN 'Facebook User' "
                              "WHEN 13 THEN 'Naver Blog Accuracy' "
+                              "WHEN 14 THEN 'Twitter Tag' "
+                              "WHEN 15 THEN 'Twitter User' "
+                              "WHEN 16 THEN 'Youtube Tag' "
+                              "WHEN 17 THEN 'Youtube User' "
                              "ELSE 'UnKnown'"
                              "END AS platform FROM keyword where state is null");
 }
@@ -535,6 +547,10 @@ void Widget::on_keyword_button_delete()
                              "WHEN 11 THEN 'Facebook Tag' "
                              "WHEN 12 THEN 'Facebook User' "
                              "WHEN 13 THEN 'Naver Blog Accuracy' "
+                              "WHEN 14 THEN 'Twitter Tag' "
+                              "WHEN 15 THEN 'Twitter User' "
+                              "WHEN 16 THEN 'Youtube Tag' "
+                              "WHEN 17 THEN 'Youtube User' "
                              "ELSE 'UnKnown'"
                              "END AS platform FROM keyword where state is null");
 }
@@ -576,6 +592,10 @@ void Widget::on_keyword_button_modify()
                              "WHEN 11 THEN 'Facebook Tag' "
                              "WHEN 12 THEN 'Facebook User' "
                              "WHEN 13 THEN 'Naver Blog Accuracy' "
+                              "WHEN 14 THEN 'Twitter Tag' "
+                              "WHEN 15 THEN 'Twitter User' "
+                              "WHEN 16 THEN 'Youtube Tag' "
+                              "WHEN 17 THEN 'Youtube User' "
                              "ELSE 'UnKnown'"
                              "END AS platform FROM keyword where state is null");
 }
@@ -1100,7 +1120,14 @@ void Widget::on_group_button_copy_start()
 void Widget::UpdateCrawling()
 {
    m_pmodelCrawling->setQuery("SELECT _crawling.id,_keyword.realtime,_keyword.searches,_keyword.start,_keyword.end, _datagroup.name , "
-                               "(CASE _keyword.platform WHEN 0 THEN 'Naver Cafe' WHEN 1 THEN 'Naver Blog' WHEN 2 THEN 'Daum Cafe' WHEN 3 THEN 'Naver News' WHEN 4 THEN 'Naver Cafe List' WHEN 5 THEN 'Daum Cafe List' WHEN 6 THEN 'Kakao Story Channel' "
+                               "(CASE _keyword.platform "
+                               "WHEN 0 THEN 'Naver Cafe' "
+                               "WHEN 1 THEN 'Naver Blog' "
+                               "WHEN 2 THEN 'Daum Cafe' "
+                               "WHEN 3 THEN 'Naver News' "
+                               "WHEN 4 THEN 'Naver Cafe List' "
+                               "WHEN 5 THEN 'Daum Cafe List' "
+                               "WHEN 6 THEN 'Kakao Story Channel' "
                               "WHEN 7 THEN 'Kakao Story Tag' "
                               "WHEN 8 THEN 'Kakao Story User' "
                               "WHEN 9 THEN 'Instagram Tag' "
@@ -1108,6 +1135,10 @@ void Widget::UpdateCrawling()
                               "WHEN 11 THEN 'Facebook Tag' "
                               "WHEN 12 THEN 'Facebook User' "
                               "WHEN 13 THEN 'Naver Blog Accuracy' "
+                               "WHEN 14 THEN 'Twitter Tag' "
+                               "WHEN 15 THEN 'Twitter User' "
+                               "WHEN 16 THEN 'Youtube Tag' "
+                               "WHEN 17 THEN 'Youtube User' "
                               "ELSE 'UnKnown' END ) AS platform , "
                               "(CASE _crawling.state WHEN 0 THEN 'Waiting' WHEN 1 THEN 'Running' WHEN 2 THEN 'Terminated' ELSE 'None' END ) AS state "
                               "FROM crawling _crawling INNER JOIN keyword _keyword ON _crawling.keyword_id = _keyword.id "
--- a/WebBasedCrawler/base/baseclasses.py
+++ b/WebBasedCrawler/base/baseclasses.py
@@ -32,6 +32,7 @@ def is_debugger_attached():

 is_debug = is_debugger_attached()

+
 def printl(*objects, sep=' ', end='\n', file=None, flush=True):
    if is_debug:
        cur_frame = inspect.currentframe()
--- a/WebBasedCrawler/base/dbdata.py
+++ b/WebBasedCrawler/base/dbdata.py
@@ -0,0 +1,79 @@
+from pymysql.connections import Connection
+import datetime
+from numbers import Number
+
+class DataDBRow:
+    def __init__(self):
+        self.platform_name = None
+        self.platform_form = None
+        self.platform_title = None
+        self.article_form = None
+        self.article_parent = None
+        self.article_id = None
+        self.article_nickname = None
+        self.article_title = None
+        self.article_data = None
+        self.article_url = None
+        self.article_hit = 0
+        self.article_date = None
+        self.article_order = 0
+        self.article_profile = None
+        self.article_profileurl = None
+        self.platform_id = None
+        self.keyword_id = -1
+        self.reply_url = None
+        self.etc = None
+
+    def get_keys(self):
+        inst = DataDBRow()
+        keys = ()
+        for key, value_type in inst.__dict__.items():
+            if key.startswith('__') or callable(value_type):
+                continue
+
+            keys += key,
+
+        return keys
+
+    def get_values(self, conn, db_num):
+        inst = DataDBRow()
+        values = ()
+        for key, value_type in inst.__dict__.items():
+            if key.startswith('__') or callable(value_type):
+                continue
+
+            value = self.__dict__[key]
+            if isinstance(value, Number):
+                values += str(value),
+            elif isinstance(value, str):
+                values += conn.escape(value.encode('utf8').decode('utf8')),
+            else:
+                values += conn.escape(value),
+
+        return values
+
+    def get_insert_query(self, conn, db_num):
+
+        inst = DataDBRow()
+
+        keys = ''
+        values = ''
+        for key, value_type in inst.__dict__.items():
+            if key.startswith('__') or callable(value_type):
+                continue
+
+            if len(keys) > 0:
+                keys += ', '
+                values += ', '
+
+            keys += key
+            value = self.__dict__[key]
+            if isinstance(value, Number):
+                values += str(value)
+            elif isinstance(value, str):
+                values += conn.escape(value.encode('utf8').decode('utf8'))
+            else:
+                values += conn.escape(value)
+
+        query = 'insert into data_{} ({}) values ({})'.format(db_num, keys, values)
+        return query
--- a/WebBasedCrawler/base/proxy.py
+++ b/WebBasedCrawler/base/proxy.py
@@ -97,6 +97,31 @@ def get_driver(platform, proxies):
    else:
        return platform_webdriver[platform](capabilities=desired_capabilities)

+_expired_proxies = []
+
+
+def set_proxy_expired(proxy):
+    if proxy not in _expired_proxies:
+        _expired_proxies.append(proxy)
+
+    address = proxy['http'][len('http://'):]
+
+    with open(proxy_filename, 'r') as f:
+        lines = f.readlines()
+
+    expired_idx = -1
+    for idx, line in enumerate(lines):
+        if line.startswith(address):
+            expired_idx = idx
+            break
+
+    if expired_idx >= 0:
+        lines[expired_idx] = '# ' + lines[expired_idx]
+        lines.append(lines.pop(expired_idx))
+
+    with open(proxy_filename, 'w') as f:
+        f.writelines(lines)
+

 def get_proxy_from_file(filename):
    """
@@ -104,7 +129,7 @@ def get_proxy_from_file(filename):
    :return (ip, port): string, string
    if ip, port or filename is invalid, return (None, None)
    """
-    proxy_lists = [line.replace('\n', '') for line in open(filename) if re_ip.search(line)]
+    proxy_lists = [line.replace('\n', '') for line in open(filename) if not line.strip().startswith('#') and re_ip.search(line)]
    if proxy_lists:
        m = re_ip.search(proxy_lists[random.randint(0, len(proxy_lists) - 1)])
        if m:
--- a/WebBasedCrawler/requirements.txt
+++ b/WebBasedCrawler/requirements.txt
@@ -0,0 +1,3 @@
+requests
+bs4
+pytz
--- a/WebBasedCrawler/twitter/init.py
+++ b/WebBasedCrawler/twitter/init.py
--- a/WebBasedCrawler/twitter/twconfig.py
+++ b/WebBasedCrawler/twitter/twconfig.py
@@ -0,0 +1,62 @@
+import datetime
+import copy
+
+class TwitterConfig:
+    protocol = 'https'
+    top_url = 'twitter.com'
+    search_url = '/i/search/timeline'
+    conversation_url_form = '/i/{}/conversation/{}'
+
+    def __init__(self):
+        self.keyword_id = -1
+        self.db_num = -1
+
+        self.id = 0
+        self.realtime = False
+        self.keywords = []
+        self.start_str = None
+        self.start = None
+        self.end_str = None
+        self.end = None
+        self.authorship = None
+        self.state = None
+        self.platform = None
+
+    def set_param(self, keyword_id, db_num, params):
+        self.keyword_id = int(keyword_id)
+        self.db_num = int(db_num)
+
+        self.id = int(params['id'])
+        self.realtime = params['realtime'] == '1'
+
+        self.keywords = []
+        for keyword in params['searches'].split(','):
+            self.keywords.append(keyword.strip())
+
+        self.start_str = str(params['start'])
+        self.end_str = str(params['end'])
+
+        self.start = datetime.datetime.combine(params['start'], datetime.datetime.min.time())
+        self.end = datetime.datetime.combine(params['end'], datetime.datetime.min.time())
+
+        self.authorship = params['authorship']
+        self.state = params['state']
+        self.platform = params['platform']
+
+    def split(self):
+        split_list = []
+        new_end = self.end
+
+        while new_end > self.start:
+            new_config = copy.deepcopy(self)
+
+            new_config.end = new_end
+            new_end = new_end + datetime.timedelta(days=-1)
+            new_config.start = new_end
+
+            new_config.start_str = new_config.start.strftime('%Y-%m-%d')
+            new_config.end_str = new_config.end.strftime('%Y-%m-%d')
+
+            split_list.append(new_config)
+
+        return split_list
--- a/WebBasedCrawler/twitter/twdbhelper.py
+++ b/WebBasedCrawler/twitter/twdbhelper.py
@@ -0,0 +1,79 @@
+from twitter.tweet import Tweet
+import multiprocessing as mp
+
+
+class TwitterDBHelper:
+    pymysql = __import__('pymysql.cursors')
+
+    def __init__(self):
+        self.tweets = []
+        self.buffer = []
+        self.lock = mp.Lock()
+        pass
+
+    def __del__(self):
+        pass
+
+    def get_param(self, keyword_id):
+        query = "select * from keyword where id = " + str(keyword_id)
+        params = []
+        try:
+            conn = self.pymysql.connect(host='bigbird.iptime.org',
+                                        user='admin', passwd='admin123',
+                                        db='concepters', charset='utf8',
+                                        cursorclass=self.pymysql.cursors.DictCursor)
+
+            with conn.cursor() as cursor:
+                cursor.execute(query)
+                params = cursor.fetchone()
+
+        except Exception as e:
+            print(e)
+            exit(1)
+
+        else:
+            conn.close()
+
+        return params
+
+    def insert_tweet(self, tweet: Tweet = None, db_num: int = -1, flush=False):
+
+        # self.lock.acquire()
+        # if tweet is not None:
+        #     self.buffer.append((tweet, db_num, ))
+        #
+        # local_buffer = None
+        # if len(self.buffer) >= 100 or flush:
+        #     local_buffer = copy.deepcopy(self.buffer)
+        #     self.buffer.clear()
+        # self.lock.release()
+
+        local_buffer = [(tweet, db_num, )]
+        if local_buffer:
+            while True:
+                try:
+                    conn = self.pymysql.connect(host='bigbird.iptime.org',
+                                                user='admin', passwd='admin123',
+                                                db='concepters', charset='utf8',
+                                                cursorclass=self.pymysql.cursors.DictCursor,
+                                                connect_timeout=5)
+
+                except Exception as e:
+                    print(e)
+                    continue
+
+                else:
+                    break
+
+            try:
+                with conn.cursor() as cursor:
+                    for tweet, _db_num in local_buffer:
+                        query = tweet.get_insert_query(conn, _db_num)
+                        cursor.execute(query)
+                    conn.commit()
+
+            except Exception as e:
+                print(e)
+
+            finally:
+                conn.close()
--- a/WebBasedCrawler/twitter/tweet.py
+++ b/WebBasedCrawler/twitter/tweet.py
@@ -0,0 +1,24 @@
+from base.dbdata import DataDBRow
+
+
+class Tweet(DataDBRow):
+
+    def __init__(self):
+        super(self.__class__, self).__init__()
+
+        self.tweet_id = None
+        self.user_id = None
+        self.user_name = None
+        self.text = None
+        self.created_at = None
+        self.retweets = 0
+        self.favorites = 0
+
+        self.is_reply = False
+        self.reply_cnt = 0
+        self.retweet_cnt = 0
+        self.favorite_cnt = 0
+        self.top_link = None
+        self.tweet_link = None
+
+        self.depth = 0
--- a/WebBasedCrawler/twitter/twittercrawl.py
+++ b/WebBasedCrawler/twitter/twittercrawl.py
@@ -0,0 +1,289 @@
+from twitter.twconfig import TwitterConfig
+from twitter.twdbhelper import TwitterDBHelper
+from twitter.tweet import Tweet
+from twitter.twparser import TweetParser
+
+import base.proxy
+import base.baseclasses
+
+import requests
+import bs4
+import json
+import urllib
+import threading
+import queue
+import time
+
+
+class TwitterCrawler():
+
+    def __init__(self):
+        self.default_config = TwitterConfig()
+        self.db_helper = TwitterDBHelper()
+
+    def set_arguments(self, browser, keyword_id, db_num, before_day, until_page):
+        params = self.db_helper.get_param(keyword_id)
+        self.default_config.set_param(keyword_id, db_num, params)
+
+    @staticmethod
+    def get_timeline_url(query, start_str, end_str, max_position=''):
+        params = {
+            'f': 'tweets',
+            'vertical': 'default',
+            'src': 'typd',
+            'q': '{} since:{} until:{}'.format(query, start_str, end_str),
+            'language': 'en',
+            'max_position': max_position,
+        }
+
+        url_tupple = (TwitterConfig.protocol, TwitterConfig.top_url, TwitterConfig.search_url, '', urllib.parse.urlencode(params), '')
+        return urllib.parse.urlunparse(url_tupple)
+
+    @staticmethod
+    def get_content_url(user_id, tweet_id, max_position=''):
+        params = {
+            'max_position': max_position,
+        }
+
+        sub_url = TwitterConfig.conversation_url_form.format(user_id, tweet_id)
+        url_tupple = (TwitterConfig.protocol, TwitterConfig.top_url, sub_url, '', urllib.parse.urlencode(params), '')
+        return urllib.parse.urlunparse(url_tupple)
+
+    @staticmethod
+    def get_page(url, proc_id):
+        headers = {
+            'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
+            'Accept-Language': 'ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4',
+        }
+        # if proxies is None:
+        proxies = base.proxy.get_proxy_for_requests()
+
+        resp = None
+        while True:
+            try:
+                resp = requests.get(url, headers=headers, proxies=proxies, timeout=3)
+            except Exception as e:
+                if proxies == (None, None):
+                    break
+
+                print('[{}] proxy {} is expired. ({})'.format(proc_id, proxies, e))
+                base.proxy.set_proxy_expired(proxies)
+                proxies = base.proxy.get_proxy_for_requests()
+            else:
+                break
+
+        return resp
+
+    def runner_proc(self, proc_id, content_queue, result_queue, config):
+        print('{} to {} runner thread start'.format(config.start_str, config.end_str))
+
+        b_continue = True
+        min_tweet_id = None
+        max_tweet_id = None
+        max_position = ''
+        tweet_count = 0
+
+        while b_continue:
+            if min_tweet_id is not None:
+                max_position = 'TWEET-{}-{}'.format(max_tweet_id, min_tweet_id)
+            url = self.get_timeline_url(config.keywords[0], config.start_str, config.end_str, max_position)
+            resp = self.get_page(url, proc_id)
+            if resp is None:
+                break
+
+            j = json.loads(resp.content.decode('utf-8'))
+            soup = bs4.BeautifulSoup(j['items_html'], 'lxml')
+            tweet_tags = soup.select("div.tweet")
+
+            for tw in tweet_tags:
+                tweet = TweetParser.parse(tw, config.keyword_id)
+
+                if tweet.is_reply is True:
+                    # print('  ## {}: {}...'.format(tweet.user_name, tweet.text[:20]))
+                    continue
+
+                if tweet.reply_cnt > 0:
+                    self.insert_content_pool(proc_id, content_queue, tweet, tweet)
+
+                self.db_helper.insert_tweet(tweet, config.db_num)
+
+                # print('{} {}: {}...'.format(tweet.created_at, tweet.user_name, tweet.text[:20]))
+
+            count = len(tweet_tags)
+            if count == 0:
+                break
+
+            if min_tweet_id is None:
+                min_tweet_id = tweet_tags[0].attrs['data-item-id']
+            max_tweet_id = tweet_tags[-1].attrs['data-item-id']
+            tweet_count += count
+
+        print('{} to {} runner thread finished {}'.format(config.start_str, config.end_str, tweet_count))
+        result_queue.put((proc_id, tweet_count, ))
+        # self.runner_processing[proc_id].value = False
+        return proc_id, tweet_count,
+
+    @staticmethod
+    def insert_content_pool(proc_id: int, qu, tweet: Tweet, tweet_top: Tweet):
+        # print('    [{}] pool insert: {} ({})'.format(proc_id, tweet.text[:20] if tweet.text else '', tweet.tweet_link))
+        qu.put((tweet, tweet_top,))
+
+    @staticmethod
+    def get_content(content_queue):
+        sleep_time = time.time()
+        while True:
+            try:
+                parent_tw, top_tw, = content_queue.get(block=True, timeout=2)
+            except Exception as e:
+                if time.time()-sleep_time > 60:
+                    break
+                else:
+                    continue
+            else:
+                return parent_tw, top_tw,
+
+        return None, None,
+
+    def content_proc(self, proc_id, content_queue, result_queue):
+        print('[{}] content thread start'.format(proc_id))
+
+        tweet_count = 0
+        while True:
+            parent_tw, top_tw, = self.get_content(content_queue)
+            if not parent_tw:
+                break
+
+            # print('    [{}] <<< parent : {} ({})'.format(proc_id, parent_tw.text[:20], parent_tw.tweet_link))
+
+            max_position = ''
+
+            b_continue = True
+            while b_continue:
+                url = self.get_content_url(parent_tw.user_id, parent_tw.tweet_id, max_position)
+                resp = self.get_page(url, proc_id)
+                if resp is None or resp.status_code == 404:
+                    break
+                elif resp.status_code != 200:
+                    print('[WARNING] content_get code {}'.format(resp.status_code))
+                    continue
+
+                j = json.loads(resp.content.decode('utf-8'))
+                soup = bs4.BeautifulSoup(j['items_html'], 'lxml')
+
+                reply_container_tags = soup.select('li.ThreadedConversation')
+                reply_container_tags += TweetParser.get_lone_container(soup, parent_tw)
+                for container_tags in reply_container_tags:
+                    tweet_tags = container_tags.select('div.tweet')
+                    if len(tweet_tags) > 0:
+                        tweet = TweetParser.parse(tweet_tags[0], self.default_config.keyword_id, parent_tw.depth+1, top_tw)
+                        # print('[{}]>>> {} {}: {} ({}) ({})'.format(proc_id, tweet.created_at, tweet.user_name, tweet.text[:20], tweet.depth, tweet.tweet_link))
+                        self.insert_content_pool(proc_id, content_queue, tweet, top_tw)
+                        self.db_helper.insert_tweet(tweet, self.default_config.db_num)
+                        tweet_count += 1
+
+                b_continue = j['has_more_items']
+                if b_continue:
+                    max_position = j['min_position']
+
+        result_queue.put((proc_id, tweet_count))
+        print('[{}] content thread finished'.format(proc_id))
+        return proc_id, tweet_count,
+
+    def debug_content(self):
+        content_qu = queue.Queue()
+        runner_result_qu = queue.Queue()
+        content_result_qu = queue.Queue()
+
+        test_tw = Tweet()
+        # test_tw.tweet_link = 'https://twitter.com/yniold_/status/886863893137678337'
+        # test_tw.user_id = 'yniold_'
+        # test_tw.tweet_id = 886863893137678337
+
+        test_tw.tweet_link = 'https://twitter.com/Awesome_vely/status/888704413111435264'
+        test_tw.user_id = 'Awesome_vely'
+        test_tw.tweet_id = 888704413111435264
+
+        test_tw.text = '시작'
+        self.insert_content_pool(0, content_qu, test_tw, test_tw)
+
+        content_threads = [threading.Thread(target=self.content_proc, args=(proc_id, content_qu, content_result_qu)) for proc_id in range(16)]
+        [th.start() for th in content_threads]
+        [th.join() for th in content_threads]
+
+        while not content_result_qu.empty():
+            res = content_result_qu.get()
+            print('reply : {}'.format(res))
+
+        print('end all')
+
+    def test_insert_db(self):
+        test_tw = Tweet()
+        test_tw.tweet_link = 'https://twitter.com/moonriver365/status/885797401033818112'
+        test_tw.user_id = 'moonriver365'
+        test_tw.tweet_id = 885797401033818112
+        for _ in range(5):
+            self.db_helper.insert_tweet(test_tw, self.default_config.db_num)
+
+    def debug(self):
+        if base.baseclasses.is_debug:
+            ## check proxy
+            # base.proxy.get_proxy_from_file('proxy.txt')
+            # proxy = {'https': 'http://45.56.86.93:3128', 'http': 'http://45.56.86.93:3128'}
+            # base.proxy.set_proxy_expired(proxy)
+            # return
+
+            ## contents check
+            self.debug_content()
+
+            # split_config = self.default_config.split()
+
+            # self.test_insert_db()
+
+            print("debug end")
+            # exit()
+
+    def start(self):
+        start_time = time.time()
+
+        # self.debug()
+        # return
+
+        # run
+        split_config = self.default_config.split()
+        content_qu = queue.Queue()
+        runner_result_qu = queue.Queue()
+        content_result_qu = queue.Queue()
+
+        runner_threads = [threading.Thread(target=self.runner_proc, args=(proc_id, content_qu, runner_result_qu, config)) for proc_id, config in enumerate(split_config)]
+        content_threads = [threading.Thread(target=self.content_proc, args=(proc_id, content_qu, content_result_qu)) for proc_id in range(16)]
+
+        [th.start() for th in runner_threads]
+        [th.start() for th in content_threads]
+
+        [th.join() for th in runner_threads]
+        [th.join() for th in content_threads]
+
+        # rerun zero runners
+        runner_threads = []
+        runner_result_qu2 = queue.Queue()
+        idx = 0
+        while not runner_result_qu.empty():
+            res = runner_result_qu.get()
+            if res == 0:
+                th = threading.Thread(target=self.runner_proc, args=(idx, content_qu, runner_result_qu2, split_config[idx]))
+                runner_threads.append(th)
+
+            idx += 1
+        content_threads = [threading.Thread(target=self.content_proc, args=(proc_id, content_qu, content_result_qu)) for proc_id in range(16)]
+
+        [th.start() for th in runner_threads]
+        [th.start() for th in content_threads]
+
+        [th.join() for th in runner_threads]
+        [th.join() for th in content_threads]
+
+        # print running time
+        delta = time.time() - start_time
+        m, s = divmod(delta, 60)
+        h, m = divmod(m, 60)
+        print("finished all {}:{:02d}:{:02d} ".format(int(h), int(m), int(s)))
--- a/WebBasedCrawler/twitter/twparser.py
+++ b/WebBasedCrawler/twitter/twparser.py
@@ -0,0 +1,96 @@
+from twitter.tweet import Tweet
+from twitter.twconfig import TwitterConfig
+
+import bs4
+import datetime
+import pytz
+
+class TweetParser:
+
+    @staticmethod
+    def parse(tag, keyword_id, depth=0, top_tw: Tweet=None):
+        tweet = Tweet()
+
+        tweet.tweet_id = int(tag.attrs['data-tweet-id'])
+
+        nickname_tag = tag.select('strong.fullname')[0]
+        tweet.user_name = ''
+        for child in nickname_tag.children:
+            if isinstance(child, bs4.element.NavigableString):
+                if len(tweet.user_name) > 0:
+                    tweet.user_name += ' '
+                tweet.user_name += child
+        tweet.user_id = tag.select('span.username')[0].text[1:]
+        tweet.text = tag.select('p.tweet-text')[0].text
+
+        # time_str = tag.select('a.tweet-timestamp')[0].attrs['title']
+        # english
+        # tweet.created_at = datetime.datetime.strptime(time_str, '%I:%M %p - %d %b %Y')
+        # korean
+        # time_str = time_str.replace('오전', 'AM').replace('오후', 'PM')
+        # tweet.created_at = datetime.datetime.strptime(time_str, '%p %I:%M - %Y년 %m월 %d일')
+
+        timestamp = int(tag.select('span._timestamp')[0].attrs['data-time'])
+        utc_dt = datetime.datetime.utcfromtimestamp(timestamp)
+        local_tz = pytz.timezone('Asia/Seoul')
+        local_dt = utc_dt.replace(tzinfo=pytz.utc).astimezone(local_tz)
+        tweet.created_at = local_tz.normalize(local_dt)
+
+        reply_tag = tag.select('div.ReplyingToContextBelowAuthor')
+        tweet.is_reply = len(reply_tag) > 0
+
+        reply_cnt_tag = tag.select('span.ProfileTweet-action--reply > span.ProfileTweet-actionCount')
+        if len(reply_cnt_tag) > 0:
+            tweet.reply_cnt = int(reply_cnt_tag[0].attrs['data-tweet-stat-count'])
+
+        retweet_cnt_tag = tag.select('span.ProfileTweet-action--retweet > span.ProfileTweet-actionCount')
+        if len(retweet_cnt_tag) > 0:
+            tweet.retweet_cnt = int(retweet_cnt_tag[0].attrs['data-tweet-stat-count'])
+
+        favorite_cnt_tag = tag.select('span.ProfileTweet-action--favorite > span.ProfileTweet-actionCount')
+        if len(favorite_cnt_tag) > 0:
+            tweet.favorites_cnt = int(favorite_cnt_tag[0].attrs['data-tweet-stat-count'])
+
+        link_tag = tag.select('a.js-permalink')
+        if len(link_tag) > 0:
+            tweet.tweet_link = TwitterConfig.protocol + '://' + TwitterConfig.top_url + link_tag[0].attrs['href']
+        tweet.top_link = top_tw.tweet_link if top_tw else tweet.tweet_link
+
+        tweet.depth = depth
+
+        tweet.platform_name = 'twitter'
+        tweet.platform_form = 'post'
+        tweet.platform_title = top_tw.user_id if top_tw else tweet.user_id
+        tweet.article_form = 'body' if tweet.depth is 0 else 'reply'
+        # tweet.article_parent = None
+        tweet.article_id = tweet.user_id
+        tweet.article_nickname = tweet.user_name
+        # tweet.article_title = None
+        tweet.article_data = tweet.text
+        tweet.article_url = tweet.top_link
+        # tweet.article_hit = 0
+        tweet.article_date = tweet.created_at
+        tweet.article_order = tweet.depth
+        # tweet.article_profile = tweet.user_name
+        tweet.article_profileurl = TwitterConfig.protocol + '://' + TwitterConfig.top_url + '/' + tweet.user_id
+        tweet.platform_id = top_tw.user_id if top_tw else tweet.user_id
+        tweet.keyword_id = keyword_id
+        tweet.reply_url = tweet.tweet_link
+        # tweet.etc = ''
+
+        return tweet
+
+    @staticmethod
+    def get_lone_container(soup, parent_tw):
+        lone_tweets = soup.select('div.ThreadedConversation--loneTweet')
+        container_tags = []
+        for tag in reversed(lone_tweets):
+            li = tag.select('li.stream-item')
+            if len(li) > 0 and 'data-item-id' in li[0].attrs:
+                tweet_id = int(li[0].attrs['data-item-id'])
+            if tweet_id == parent_tw.tweet_id:
+                break
+
+            container_tags.append(tag)
+
+        return reversed(container_tags)
--- a/WebBasedCrawler/webbasedcrawler.py
+++ b/WebBasedCrawler/webbasedcrawler.py
@@ -11,6 +11,8 @@ from kakao import kakaocrawl
 from naver import navercrawl
 from facebook import facebookcrawl
 from facebook import facebookcrawlbs
+from twitter import twittercrawl
+from youtube import youtubecrawl

 from base.baseclasses import print_and_flush

@@ -26,8 +28,12 @@ class WebBasedCrawler:
            self.crawler = kakaocrawl.KakaoMainCrawler()
        elif platform == "navercafe":
            self.crawler = navercrawl.NaverCafeMainAreaCrawler()
-        elif platform == "facebook":
+        elif platform == 'facebook':
            self.crawler = facebookcrawlbs.FacebookMainCrawler()
+        elif platform == 'twitter':
+            self.crawler = twittercrawl.TwitterCrawler()
+        elif platform == 'youtube':
+            self.crawler = youtubecrawl.YoutubeMainCrawler()
        else:
            self.crawler = None
            raise Exception
@@ -38,7 +44,7 @@ class WebBasedCrawler:


 browser_opt = ('chrome', "ie", "opera", "firefox")
-platform_opt = ('instagram', 'kakaostory', 'navercafe', "facebook")
+platform_opt = ('instagram', 'kakaostory', 'navercafe', 'facebook', 'twitter', 'youtube')


 def get_browser_info(platform_, file_name="browser.txt"):
@@ -73,7 +79,7 @@ def get_browser_info(platform_, file_name="browser.txt"):
 if __name__ == '__main__':
    """
    sys.argv[0] webbasedcrawler.py
-    sys.argv[1] instagram, kakaochannel, navercafe, facebook
+    sys.argv[1] instagram, kakaochannel, navercafe, facebook, twitter, youtube
    sys.argv[2] keyword_id
    sys.argv[3] data group
    sys.argv[4] start_day
@@ -85,8 +91,7 @@ if __name__ == '__main__':
    else:
        print_and_flush("Check Argumenets!")
        exit(1)
-    crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2],
-                              sys.argv[3], sys.argv[4], sys.argv[5])
+    crawler = WebBasedCrawler(get_browser_info(sys.argv[1]), sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
    crawler.start()
    print_and_flush("Finished Crawling :)")
    exit(0)
--- a/WebBasedCrawler/youtube/init.py
+++ b/WebBasedCrawler/youtube/init.py
--- a/WebBasedCrawler/youtube/youtubecrawl.py
+++ b/WebBasedCrawler/youtube/youtubecrawl.py
@@ -0,0 +1,7 @@
+
+class YoutubeMainCrawl:
+    def __init__(self):
+        pass
+
+    def start(self):
+        pass