instagram, kakaostory effect 추가

git-svn-id: svn://192.168.0.12/source@308 8346c931-da38-4b9b-9d4c-e48b93cbd075
2016-11-04 10:44:08 +00:00
parent 732ebaa53b
commit 73acdf3d3c
4 changed files with 931 additions and 0 deletions
--- a/WebBasedCrawler/effect/effecterror.py
+++ b/WebBasedCrawler/effect/effecterror.py
@@ -0,0 +1,145 @@
+DB_OPEN_ERROR       = 0
+DB_FULL_ERROR       = 1
+DB_LONG_QUERY_ERROR = 2
+DB_QUERY_ERROR      = 3
+DB_UNKNOWN_ERROR    = 4
+INTERNET_ERROR      = 5
+OUT_DATED_CRAWLER   = 6
+DELETED_URL_ERROR   = 7
+BLOCK_ERROR         = 8
+TIMEOUT             = 9
+NO_PROGRAM          = 10
+UNKNOWN_ERROR       = 11
+
+error_message = [
+    "DB_OPEN_ERROR",
+    "DB_FULL_ERROR",
+    "DB_LONG_QUERY_ERROR",
+    "DB_QUERY_ERROR",
+    "DB_UNKNOWN_ERROR",
+    "INTERNET_ERROR",
+    "OUT_DATED_CRAWLER",
+    "DELETED_URL_ERROR",
+    "BLOCK_ERROR",
+    "TIMEOUT",
+    "NO_PROGRAM",
+    "UNKNOWN_ERROR",
+]
+
+error_message_code = [
+    "e000",
+    "e001",
+    "e002",
+    "e003",
+    "e004",
+    "e005",
+    "e006",
+    "e007",
+    "e008",
+    "e009",
+    "e010",
+    "e011",
+]
+
+SEPERATOR = '!@#'
+
+
+class EffectException(Exception):
+    def __init__(self, error_no, msg='', *args, **kwargs):
+        self.error_no = error_no
+        self.error_message_code = error_message_code[self.error_no]
+        self.msg = msg
+        Exception.__init__(self, *args, **kwargs)
+
+    def __str__(self):
+        try:
+            s = self.error_message_code + SEPERATOR + self.msg
+        except Exception as e:
+            print(e)
+        return s
+
+
+class DBOpenError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = DB_OPEN_ERROR
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class DBFullError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = DB_FULL_ERROR
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class DBLongQueryError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = DB_LONG_QUERY_ERROR
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class DBQueryError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = DB_QUERY_ERROR
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class DBUnknownError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = DB_UNKNOWN_ERROR
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class InternetError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = INTERNET_ERROR
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class OutDatedCrawler(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = OUT_DATED_CRAWLER
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class DeletedUrlError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = DELETED_URL_ERROR
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class BlockError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = BLOCK_ERROR
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class TimeOutError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = TIMEOUT
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class NoProgramError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = NO_PROGRAM
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
+class UnknownError(EffectException):
+    def __init__(self, msg='', *args, **kwargs):
+        self.error_no = UNKNOWN_ERROR
+        self.msg = msg
+        EffectException.__init__(self, self.error_no, self.msg, *args, **kwargs)
+
+
--- a/WebBasedCrawler/effect/effectinstagram.py
+++ b/WebBasedCrawler/effect/effectinstagram.py
@@ -0,0 +1,213 @@
+import re
+
+import datetime
+import json
+import requests
+import requests.exceptions
+import time
+
+import insta.instaheaders as instaheaders
+import insta.instaparser as instaparser
+from effect.resultsender import ResultSender
+from effect.resultsender import get_settings
+
+from base.baseclasses import printl
+from base.baseclasses import wait
+
+import effect.effecterror
+
+is_debuging = False
+is_debug = True
+
+SEPERATOR = '!@#'
+
+num_of_list_ajax = 24
+num_of_reply_ajax = 100
+list_wait_sec = 0.9
+body_wait_sec = 0.5
+reply_wait_sec = 0.8
+num_of_page_down = 20
+num_of_content_process = 10
+requests_timeout = 60
+num_of_retry_proxy = 5
+
+insta_url = "https://www.instagram.com/"
+insta_tag_url = "https://www.instagram.com/explore/tags/"
+insta_query = "https://www.instagram.com/query/"
+insta_body_url = 'https://www.instagram.com/p/'
+
+
+def requests_get(req, timeout=requests_timeout):
+    body = []
+    start = time.time()
+    for chunk in req.iter_content(1024):
+        body.append(chunk)
+        if time.time() > (start + timeout):
+            req.close()
+            raise Exception("timeout")
+    return b''.join(body)
+
+
+class InstaContent:
+    def __init__(self, url, cookies, referer, proxies=None):
+        self.__r = None
+        self.__referer = ''
+        self.__code = ''
+        self.body = None
+        self.reply = []
+        self.start_cursor = None
+        self.has_previous = False
+        self.cookies = {}
+        self.proxies = proxies
+        self.load_url(url, cookies, referer, self.proxies)
+
+    def load_url(self, url, cookies, referer, proxies):
+        self.__set_cookies(cookies)
+        self.__r = requests.get(url, headers=instaheaders.get_headers_for_body_html(self.cookies), proxies=proxies,
+                                timeout=requests_timeout, stream=True)
+        content = requests_get(self.__r)
+        self.__r.raise_for_status()
+        self.__referer = referer
+        self.__code = self.__get_code(url)
+        # self.body, self.reply, self.start_cursor, self.has_previous = instaparser.parse_body_html(self.__r.content)
+        self.body, self.reply, self.start_cursor, self.has_previous = instaparser.parse_body_html(content)
+        self.__set_cookies(self.__r.cookies)
+        self.__r.close()
+        return self.body, self.reply
+
+    def get_body(self):
+        return self.body
+
+    def get_reply(self):
+        return self.reply
+
+    def load_reply_more(self):
+        form_data = instaheaders.get_form_data_for_reply(self.__code, self.start_cursor, num_of_reply_ajax)
+        headers = instaheaders.get_headers_for_ajax(self.cookies, self.__referer, form_data)
+        self.log_load_reply_more_before(form_data, headers)
+        self.__r = requests.post(insta_query, headers=headers, data=form_data, proxies=self.proxies,
+                                 timeout=requests_timeout, stream=True)
+        content = requests_get(self.__r)
+        self.__r.raise_for_status()
+        self.__set_cookies(self.__r.cookies)
+        # self.reply, self.start_cursor, self.has_previous = instaparser.parse_reply_ajax(self.__r.content)
+        self.reply, self.start_cursor, self.has_previous = instaparser.parse_reply_ajax(content)
+        self.__r.close()
+        self.log_load_reply_more_after()
+        return self.reply
+
+    def get_cookies(self):
+        return self.cookies
+
+    def __get_code(self, url):
+        m = re.search(insta_body_url + "([^/]*)", url)
+        if m:
+            return m.group(1)
+        else:
+            raise RuntimeError('Tag Error')
+
+    def __set_cookies(self, cookies):
+        for k, v in cookies.items():
+            self.cookies[k] = v
+
+    def get_proxy(self):
+        return self.proxies
+
+    def log_load_reply_more_before(self, form_data, headers):
+        if is_debuging:
+            printl("<ContentReply Start>")
+            printl("<ContentReply requests>")
+            printl('start_cursor = ' + self.start_cursor)
+            printl('form_data' + form_data)
+            printl('headers = ', end=' ')
+            printl(headers)
+
+    def log_load_reply_more_after(self):
+        if is_debuging:
+            printl("<ContentReply response>")
+            printl('self.__r.cookies=', end='')
+            printl(self.__r.cookies)
+            printl('start_cursor = ' + str(self.start_cursor))
+            printl('has_previous = ', end='')
+            printl(self.has_previous)
+            printl('proxies = ', end='')
+            printl(self.proxies)
+            printl("<ContentReply End>")
+
+
+class EffectInsta(object):
+    def __init__(self, event_num, event_code, url):
+        self.event_num = event_num
+        self.event_code = event_code
+        self.url = url
+
+    def start(self):
+        #content = insta.instacrawl.InstaContent(self.url, {}, self.url)
+        try:
+            content = InstaContent(self.url, {}, self.url)
+            body = content.get_body()
+            replies = content.get_reply()
+            while content.has_previous:
+                replies = content.load_reply_more() + replies
+                wait(2)
+        except requests.exceptions.HTTPError as e:
+            raise effect.effecterror.DeletedUrlError(str(e))
+        except Exception as e:
+            raise effect.effecterror.OutDatedCrawler(str(e))
+
+        if not body.get('article_id', ''):
+            raise effect.effecterror.OutDatedCrawler("NoData Crawled")
+
+        try:
+            result = self.statistics(body, replies)
+        except Exception as e:
+            raise effect.effecterror.UnknownError(str(e))
+        #pprint.pprint(body)
+        #pprint.pprint(replies)
+        #pprint.pprint(result)
+        try:
+            cg = get_settings()
+        except Exception as e:
+            raise effect.effecterror.UnknownError(str(e) + '\n' + 'effect.ini setting error')
+
+        try:
+            result_sender = ResultSender(cg['host'], cg['user'], cg['pass'], cg['name']) if cg else ResultSender()
+            result_sender.connect()
+            result_sender.send('stats_s1_effect', result)
+            result_sender.close()
+        except Exception as e:
+            raise effect.effecterror.DBQueryError(str(e))
+
+
+    def statistics(self, body, replies):
+        result = {}
+        result['viewcount'] = int(body.get('article_hit', 0))
+        result['event_num'] = self.event_num
+        result['replycount'] = int(body.get('article_order'), 0)
+        result['likecount'] = int(body.get('reply_url'), 0)
+        result['interactioncount'] = self.get_replycount(body, replies)
+        result['replybuzz'] = self.get_reply_buzz(body, replies)
+        result['engagementcount'] = result.get('likecount', 0) + result.get('replycount', 0)
+        return result
+
+    def get_replycount(self, body, replies):
+        set_reply_id = set()
+        for i in replies:
+            set_reply_id.add(i.get('article_id', ''))
+        return len(set_reply_id) - 1 if body.get('article_id') in set_reply_id else len(set_reply_id)
+
+    def get_reply_buzz(self, body, replies):
+        start_date = datetime.datetime.strptime(body['article_date'], '%Y-%m-%d %H:%M:%S').date()
+        end_date = datetime.datetime.now().date()
+        date_dict = dict()
+        while start_date <= end_date:
+            date_dict[start_date.strftime('%Y%m%d')] = 0
+            start_date = start_date + datetime.timedelta(days=1)
+
+        for reply in replies:
+            str_reply_date = reply.get('article_date', '1990-01-01 00:00:00')
+            reply_date = datetime.datetime.strptime(str_reply_date, '%Y-%m-%d %H:%M:%S').date().strftime('%Y%m%d')
+            if reply_date in date_dict:
+                date_dict[reply_date] = date_dict[reply_date] + 1
+        return json.dumps(date_dict, sort_keys=True)
+
--- a/WebBasedCrawler/effect/effectkakaostory.py
+++ b/WebBasedCrawler/effect/effectkakaostory.py
@@ -0,0 +1,485 @@
+import datetime
+import json
+import effect.effecterror
+import re
+
+from kakao.kakaoexception import NotFoundElementError
+from bs4 import BeautifulSoup
+
+from base.baseclasses import wait
+from effect.resultsender import get_settings
+from effect.resultsender import ResultSender
+from base.baseclasses import find_element_by_css_selector
+from base.baseclasses import enter_element
+
+try:
+    import lxml
+    parser_opt = 'lxml'
+except ImportError:
+    parser_opt = 'html.parser'
+
+SEPERATOR = '!@#'
+
+kakaostory_url = 'https://story.kakao.com/'
+kakaostory_channel_url = 'https://story.kakao.com/ch/'
+limit_reload = 5
+num_of_retry = 3
+
+
+re_date = re.compile("^([\\d]{4})[\\D]+([\\d]{1,2})[\\D]+([\\d]{1,2})"
+                     "[\\w]+[\\s]+([\\w]+)[\\s]+([\\d]{1,2})[\\D]([\\d]{1,2})")
+
+
+def get_date(element):
+    """
+
+    :param element: this may be span.time element
+    :return: 'yyyy-MM-dd hh:mm:ss'
+    """
+    m = re_date.search(element.attrs.get('title', '')) \
+        or re_date.search(element.attrs.get('data-tooltip', ''))
+
+    if m:
+        temp_date = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
+                                      int(m.group(5)), int(m.group(6)))
+        # add 12 hour when the article is written at p.m
+        if m.group(4) == "오후" and int(m.group(5)) < 12:
+            temp_date += datetime.timedelta(hours=12)
+
+        # convert datetime.datetime to str
+        return str(temp_date)
+    # return invalid date instead of exception
+    else:
+        # raise NotFoundElementError("get_date exception")
+        return "0000-00-00 00:00:00"
+
+
+class BodyCrawler(object):
+    def __init__(self, driver):
+        self.driver = driver
+        self.soup = None
+        self.section_activity = None
+        self.set_soup_and_activity()
+        if not self.section_activity:
+            raise NotFoundElementError("section _activity is not Found")
+
+    # calling point may differ
+    def set_soup_and_activity(self):
+        self.soup = BeautifulSoup(self.driver.page_source, parser_opt)
+        # There are many div.section _activity. But element we use is in div.cover_wrapper
+        # cover_wrapper = self.soup.find('div', class_='cover_wrapper')
+        self.section_activity = self.soup.find('div', class_='section _activity')
+
+    def find_article_id(self):
+        a = self.section_activity.find('a', class_='pf_name')
+        href = a.attrs['href'].replace('https://story.kakao.com/', '')
+        return href[1:] if href.startswith('/') else href
+
+    def find_article_nickname(self):
+        a = self.section_activity.find('a', class_='pf_name')
+        return a.text
+
+    def find_article_url(self):
+        # in chrome, current_url is equal to article_url
+        # need to check other browser
+        return self.driver.current_url
+
+    def find_article_modified_date(self):
+        # get DOM about modified date
+        times = None
+        add_top = self.section_activity.find('div', class_='add_top')
+        if add_top:
+            times = add_top.find_all('span', class_='time')
+
+        # written time is default. if the article was modified, modified time is added.
+        # so if length of times is not equal to 2, there is only written time.
+        if not times or len(times) < 2:
+            return None
+
+        # times[0] : written time, times[1] : modified time
+        # times[1] structure : <span><span ...> </span></span>
+        # check times[1].span exists
+        if times[1].span:
+
+            # before mouse over the element(tooltip), the date string is in the title attribute of span
+            # after mouse over the element(tooltip), the date string is in the data-tooltip attribute of span
+            m = re_date.search(times[1].span.attrs.get('title', '')) \
+                or re_date.search(times[1].span.attrs.get('data-tooltip', ''))
+
+            if m:
+                temp_date = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
+                                              int(m.group(5)), int(m.group(6)))
+                # add 12 hour when the article is written at p.m
+                if m.group(4) == "오후" and int(m.group(5)) < 12:
+                    temp_date += datetime.timedelta(hours=12)
+
+                # convert datetime.datetime to str
+                return str(temp_date)
+            else:
+                # raise NotFoundDataError('data for find_article_modified is not found')
+                return None
+
+        # return None instead of exception.
+        else:
+            # raise NotFoundElementError('find_article_modified DOM is missing')
+            return None
+
+    def find_article_date(self):
+        # modified date is a higher priority than written date
+
+        # modified_date = self.find_article_modified_date()
+        # if modified_date:
+        #     return modified_date
+
+        times = None
+        # get DOMs about date
+        add_top = self.section_activity.find('div', class_='add_top')
+        if add_top:
+            times = add_top.find_all('span', class_='time')
+        else:
+            raise NotFoundElementError("find_article_data DOM is missing : add_top")
+        if not times:
+            raise NotFoundElementError("find_article_data DOM is missing : time")
+
+        # before mouse over the element(tooltip), the date string is in the title attribute of span
+        # after mouse over the element(tooltip), the date string is in the data-tooltip attribute of span
+        m = re_date.search(times[0].attrs.get('title', '')) \
+            or re_date.search(times[0].attrs.get('data-tooltip', ''))
+
+        if m:
+            temp_date = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
+                                          int(m.group(5)), int(m.group(6)))
+            # add 12 hour when the article is written at p.m
+            if m.group(4) == "오후" and int(m.group(5)) < 12:
+                temp_date += datetime.timedelta(hours=12)
+
+            # convert datetime.datetime to str
+            return str(temp_date)
+        # return invalid date instead of exception
+        else:
+            # raise NotFoundElementError("find_article_date exception")
+            return "0000-00-00 00:00:00"
+
+    def find_article_profileurl(self):
+        profile_area = self.section_activity.find('div', class_='_profileArea pf')
+        # check a>img
+        if profile_area.a and profile_area.a.img and profile_area.a.img.get('src'):
+            return profile_area.a.img.get('src')
+        # this is not essential, so no exception occur
+        else:
+            return ''
+
+    def find_article_data(self):
+        """
+        :return: trimmed article_data
+        """
+        content = self.section_activity.find('div', class_='txt_wrap')
+        if content and content.text:
+            # trim
+            return content.text.strip().replace('\xa0', '\n')
+        # if there is no content or text, return empty data
+        else:
+            return ''
+
+    def find_article_title(self):
+        # strong.tit_channel is title of channel
+        # if strong.tit_channel do not exist,
+        # title is first line of article_data
+        # this definition is determined by me -_-
+        # find_article_data return trimmed string
+        strong = self.section_activity.find('strong', class_='tit_channel')
+        if strong and strong.text:
+            return strong.text.replace('\xa0', '')
+
+        article_data = self.find_article_data()
+        if article_data:
+            for line in article_data.splitlines():
+                # limit title length
+                return line[0:30] if len(line) > 30 else line
+        else:
+            return ''
+
+    def find_article_etc(self, class_name):
+        """
+        this function is used for crawling number of shares, replies and feelings
+        :param class_name:
+        :return: a string of number of shares, replies, or feelings
+        """
+        element = self.section_activity.find('strong', class_=class_name)
+
+        # check element has text that indicate the number
+        if element and element.text:
+            # It may contain comma ',' to recognize easily
+            # Remove comma ',' to convert from str to int
+            txt = element.text.replace(',', '')
+            return txt
+        # if there is no element or text, return '0' instead of raising exception
+        else:
+            # raise NotFoundElementError('find_article_etc is not Found element with ' + class_name)
+            return '0'
+
+    def find_article_share(self):
+        return self.find_article_etc('_storyShareCount')
+
+    def find_article_feeling(self):
+        return self.find_article_etc('_likeCount')
+
+    def find_article_reply_num(self):
+        return self.find_article_etc('_commentCount')
+
+    def find_platform_form(self):
+        article_id = self.find_article_id()
+        return 'channel' if article_id.startswith('ch/') else 'story'
+
+    def get(self):
+        """
+        you need to put 'keyword_id'
+        :return: dict for crawled body content
+        """
+        content = dict()
+        content['article_id'] = self.find_article_id()
+        content['article_nickname'] = self.find_article_nickname()
+        content['article_data'] = self.find_article_data()
+        content['article_title'] = self.find_article_title()
+        content['article_date'] = self.find_article_date()
+        content['article_url'] = self.find_article_url()
+        content['article_profileurl'] = self.find_article_profileurl()
+        content['article_order'] = self.find_article_reply_num()
+        content['article_parent'] = self.find_article_share()
+        content['reply_url'] = self.find_article_feeling()
+        content['platform_form'] = self.find_platform_form()
+        content['article_form'] = 'body'
+        content['platform_name'] = 'kakaostory'
+        content['platform_id'] = content['article_id']
+        content['platform_title'] = content['article_nickname']
+        return content
+
+
+class ReplyCrawler(object):
+    def __init__(self, driver):
+        self.driver = driver
+        self.soup = None
+        self.section_activity = None
+        self.ul = None
+        self.lis = None
+
+    def set_soup_and_activity(self):
+        self.soup = BeautifulSoup(self.driver.page_source, parser_opt)
+        # There are many div.section _activity. But a element we use is in div.cover_wrapper
+        # cover_wrapper = self.soup.find('div', class_='cover_wrapper')
+        self.section_activity = self.soup.find('div', class_='section _activity')
+        self.ul = self.section_activity.find('ul', class_='list _listContainer')
+
+    def load_all_reply(self):
+        previous_num_of_replies = 0
+        while self.has_more():
+            self.click_load_more_reply_btn()
+            # check the number of replies before and after click_load_more_reply_btn()
+            # If These were equal, the link or ajax failed
+            current_num_of_replies = self.get_num_of_replies()
+            if previous_num_of_replies == current_num_of_replies:
+                break
+            previous_num_of_replies = current_num_of_replies
+
+    def get_num_of_replies(self):
+        # Find ul element that contains replies
+        # if raise occur, there is no reply
+        # for performance, this method may is implemented using bs4
+        try:
+            ul = find_element_by_css_selector(self.driver,
+                                              #"div.cover_wrapper "
+                                              "div[class='section _activity'] "
+                                              "ul[class='list _listContainer']", 5)
+            li = ul.find_elements_by_tag_name('li')
+            return len(li)
+        except Exception as e:
+            return 0
+
+    def click_load_more_reply_btn(self):
+        try:
+            # find a link to load reply and click/enter it
+            a = find_element_by_css_selector(self.driver,
+                                             #"div.cover_wrapper "
+                                             "div[class='section _activity'] "
+                                             "a[class='_btnShowMoreComment']", 5)
+            enter_element(a)
+
+        # no link is in the browser. Nothing happens instead raise exception. But log this event
+        except Exception as e:
+            pass
+            # printl("In click_load_more_reply_btn, there is not a link to load replies")
+            # printl(e)
+
+    def has_more(self):
+        # In the case that raise exception,
+        # there is no more reply or css selector of the show_more is invalid
+        # These two case can't be classified by exception because the logic is same
+        try:
+            # find show_more element
+            show_more = find_element_by_css_selector(self.driver,
+                                                     # "div.cover_wrapper "
+                                                     "div[class='section _activity'] "
+                                                     "p[class='more _showMoreCommentContainer']", 5)
+
+            # 'display:block;' -> display the button, 'display:none;' -> hide the button
+            if 'block' in show_more.get_attribute('style'):
+                return True
+            else:
+                return False
+        # return False in the two case
+        # First case is that loading replies is finished
+        # Second case is that css selector to find element is invalid
+        except Exception as e:
+            return False
+
+    # find_xxxx functions
+
+    def find_article_id(self):
+        # Find name placeholder
+        divs = self.ul.find_all('a', class_='name _namePlaceholder')
+        # Get article_ids and remove kakaostory url in article_id
+        article_ids = [div.attrs.get('href', '').replace(kakaostory_url, '')
+                       for div in divs if div.attrs.get('href', '')]
+        # Refine hrefs. Href may start with '/'
+        article_id = map(lambda x: x[1:] if x.startswith('/') else x, article_ids)
+        # Return list because of unification of types
+        return list(article_id)
+
+    def find_article_nickname(self):
+        divs = self.ul.find_all('a', class_='name _namePlaceholder')
+        # If div.text exist, return div.text. Otherwise return empty string
+        return [div.text if div.text else '' for div in divs]
+
+    def find_article_data(self):
+        divs = self.ul.find_all('div', class_='txt')
+        # The div.text has meta-data in div.p.text. If meta-data exists, remove it
+        # When element does not exists, return empty string
+        return [div.text[len(div.p.text):].replace('\xa0', '\n')
+                if div.p else div.text if div.text else '' for div in divs]
+
+    def find_article_date(self):
+        divs = self.ul.find_all('span', class_='time')
+        return list(map(get_date, divs))
+
+    def find_article_like(self):
+        spans = self.ul.find_all('span', class_='like_num _likeCommentCount')
+        # The number of like exists in span.like_num _likeCommentCount Unless it is present
+        return [span.text if span.text else '' for span in spans]
+
+    def find_article_profileurl(self):
+        divs = self.ul.find_all('div', class_='pf')
+        return list(map(lambda div: div.a.img.attrs.get('src', '') if div.a and div.a.img else '', divs))
+
+    def get(self):
+        """
+        Need to put platform_title, platform_id, platform_form from body
+        :return: a list of replies. Need to put platform_title, platform_id
+        """
+        # load all replies
+        self.load_all_reply()
+
+        # After loading all replies, crawl replies using BeautifulSoup
+        self.set_soup_and_activity()
+
+        article_ids = self.find_article_id()
+        article_nicknames = self.find_article_nickname()
+        article_datas = self.find_article_data()
+        article_dates = self.find_article_date()
+        article_profileurls = self.find_article_profileurl()
+        article_likes = self.find_article_like()
+        article_url = self.driver.current_url
+
+        replies = []
+        # This may occur exception when indices of each elements is not matched
+        # This exception described above is intended
+        for i in range(len(article_ids)):
+            reply = dict()
+            reply['article_id'] = article_ids[i]
+            reply['article_nickname'] = article_nicknames[i]
+            reply['article_data'] = article_datas[i]
+            reply['article_date'] = article_dates[i]
+            reply['article_profileurl'] = article_profileurls[i]
+            reply['reply_url'] = article_likes[i]
+            reply['platform_name'] = 'kakaostory'
+            reply['article_form'] = 'reply'
+            reply['article_url'] = article_url
+            reply['article_order'] = str(i)
+            replies.append(reply)
+        return replies
+
+
+class EffectKakaostory(object):
+    def __init__(self, event_num, event_code, url, driver):
+        self.event_num = event_num
+        self.event_code = event_code
+        self.url = url
+        self.driver = driver
+
+    def start(self):
+        try:
+            self.driver.get(self.url)
+            wait(3)
+            body_crawler = BodyCrawler(self.driver)
+            reply_crawler = ReplyCrawler(self.driver)
+            body = body_crawler.get()
+            replies = reply_crawler.get()
+        except Exception as e:
+            raise effect.effecterror.OutDatedCrawler(str(e))
+
+        if not body.get('article_id', ''):
+            raise effect.effecterror.OutDatedCrawler("NoData Crawled")
+
+        try:
+            result = self.statistics(body, replies)
+        except Exception as e:
+            raise effect.effecterror.UnknownError(str(e))
+        #pprint.pprint(body)
+        #pprint.pprint(replies)
+        #pprint.pprint(result)
+        try:
+            cg = get_settings()
+        except Exception as e:
+            raise effect.effecterror.UnknownError(str(e) + '\n' + 'effect.ini setting error')
+
+        try:
+            result_sender = ResultSender(cg['host'], cg['user'], cg['pass'], cg['name']) if cg else ResultSender()
+            result_sender.connect()
+            result_sender.send('stats_s1_effect', result)
+            result_sender.close()
+        except Exception as e:
+            raise effect.effecterror.DBQueryError(str(e))
+
+    def statistics(self, body, replies):
+        result = {}
+        result['viewcount'] = int(body.get('article_hit', 0))
+        result['event_num'] = self.event_num
+        result['replycount'] = int(body.get('article_order'), 0)
+        result['likecount'] = int(body.get('reply_url'), 0)
+        result['interactioncount'] = self.get_replycount(body, replies)
+        result['replybuzz'] = self.get_reply_buzz(body, replies)
+        result['engagementcount'] = result.get('likecount', 0) + result.get('replycount', 0)
+        return result
+
+    def get_replycount(self, body, replies):
+        set_reply_id = set()
+        for i in replies:
+            set_reply_id.add(i.get('article_id', ''))
+        return len(set_reply_id) - 1 if body.get('article_id') in set_reply_id else len(set_reply_id)
+
+    def get_reply_buzz(self, body, replies):
+        start_date = datetime.datetime.strptime(body['article_date'], '%Y-%m-%d %H:%M:%S').date()
+        end_date = datetime.datetime.now().date()
+        date_dict = dict()
+        while start_date <= end_date:
+            date_dict[start_date.strftime('%Y%m%d')] = 0
+            start_date = start_date + datetime.timedelta(days=1)
+
+        for reply in replies:
+            str_reply_date = reply.get('article_date', '1990-01-01 00:00:00')
+            reply_date = datetime.datetime.strptime(str_reply_date, '%Y-%m-%d %H:%M:%S').date().strftime('%Y%m%d')
+            if reply_date in date_dict:
+                date_dict[reply_date] = date_dict[reply_date] + 1
+        return json.dumps(date_dict, sort_keys=True)
+
+
+
--- a/WebBasedCrawler/effect/resultsender.py
+++ b/WebBasedCrawler/effect/resultsender.py
@@ -0,0 +1,88 @@
+import re
+import configparser
+
+class ResultSender:
+    pymysql = __import__('pymysql.cursors')
+    re_emoji = re.compile(u'[\U0001F300-\U0001F64F\U0001F680-\U0001F6FF]+', re.UNICODE)
+
+    def __init__(self, host='182.162.171.147', user='admin', passwd='admin123', db='bigbird'):
+        self.host = host
+        self.user = user
+        self.passwd = passwd
+        self.db = db
+        self.conn = None
+
+    def connect(self):
+        self.conn = self.pymysql.connect(host=self.host,
+                                         user=self.user,
+                                         passwd=self.passwd,
+                                         db=self.db,
+                                         charset='utf8',
+                                         cursorclass=self.pymysql.cursors.DictCursor)
+
+    def close(self):
+        if self.conn:
+            self.conn.close()
+
+    def _make_query(self, table_name, dictionary):
+        query = "insert into " + str(table_name) + " ("
+        key_list = list()
+        val_list = list()
+        for key, val in dictionary.items():
+            key_list.append(key)
+            if type(val) == int:
+                val_list.append(str(val))
+            else:
+                val_list.append(self.conn.escape(self.re_emoji.sub(' ', str(val))))
+        return query + ",".join(key_list) + ") values (" + ",".join(val_list) + ")" # + " on duplicate key update " + \
+ #              ','.join(map(lambda x:x[0] + '=' + x[1], zip(key_list, val_list)))
+
+    def send(self, table_name, dictionary):
+        query = self._make_query(table_name, dictionary)
+        self._exec_query(query)
+
+    def _exec_query(self, query):
+        if not self.conn.open:
+            self.connect()
+        with self.conn.cursor() as cursor:
+            cursor.execute(query)
+            self.conn.commit()
+
+
+def get_settings(option='database', filename='effect.ini'):
+    file_content = ''
+    start = 0
+    end = 0
+    try:
+        with open(filename, encoding='utf8') as f:
+            file_content = f.readlines()
+    except:
+        return None
+
+    for i in range(0, len(file_content)):
+        line_trimmed = file_content[i].strip()
+        if line_trimmed.startswith('#'):
+            continue
+        elif line_trimmed.startswith('[') and line_trimmed.endswith(']') and line_trimmed[1:-1] == option:
+            start = i
+            break
+
+    for i in range(start + 1, len(file_content)):
+        line_trimmed = file_content[i].strip()
+        if line_trimmed.startswith('#') and line_trimmed[1] == '[' and line_trimmed[-1] == ']':
+            end = i + 1
+            break
+        elif line_trimmed.startswith('['):
+            end = i + 1
+            break
+        elif i == len(file_content) - 1:
+            end = i + 1
+            break
+
+    if start == end:
+        return None
+
+    cg = configparser.ConfigParser()
+    cg.read_string(''.join(file_content[start:end]))
+    return cg[option]
+