kakaostory 업데이트 대응
utf-8 4byte emoji 공백처리 git-svn-id: svn://192.168.0.12/source@254 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -9,6 +9,7 @@ import time
|
||||
import os
|
||||
import psutil
|
||||
import threading
|
||||
import re
|
||||
from time import localtime, strftime
|
||||
|
||||
from selenium import webdriver
|
||||
@@ -189,6 +190,7 @@ class Browser:
|
||||
|
||||
class SendtoDB:
|
||||
pymysql = __import__('pymysql.cursors')
|
||||
re_emoji = re.compile(u'[\U0001F300-\U0001F64F\U0001F680-\U0001F6FF]+', re.UNICODE)
|
||||
|
||||
def __init__(self, db_num=0):
|
||||
self.conn = self.pymysql.connect(host='bigbird.iptime.org',
|
||||
@@ -223,7 +225,7 @@ class SendtoDB:
|
||||
if type(val) == int:
|
||||
val_list.append(str(val))
|
||||
else:
|
||||
val_list.append(self.conn.escape(val))
|
||||
val_list.append(self.conn.escape(self.re_emoji.sub(' ', str(val))))
|
||||
return query + ",".join(key_list) + ") values (" + ",".join(val_list) + ")"
|
||||
|
||||
def send_body(self, body):
|
||||
|
||||
@@ -545,12 +545,12 @@ class FacebookPageCrawler:
|
||||
return None
|
||||
while True:
|
||||
self.index += 1
|
||||
if self.index >= len(self.posts):
|
||||
if self.index > len(self.posts):
|
||||
# self.posts = self.driver.find_elements_by_css_selector("a[class='_5pcq']")
|
||||
# self.posts = self.driver.find_elements_by_xpath("//span[@class='fsm fwn fcg']/a[@class='_5pcq'][1]")
|
||||
# self.posts = self.driver.find_elements_by_xpath("//div[@class='_1dwg']//span[@class='fsm fwn fcg'][1]/a[@class='_5pcq'][1]")
|
||||
self.posts = self.find_posts()
|
||||
if self.index >= len(self.posts):
|
||||
if self.index > len(self.posts):
|
||||
if self.load_more_posts() is False:
|
||||
self.posts = None
|
||||
return None
|
||||
@@ -594,12 +594,12 @@ class FacebookPageCrawler:
|
||||
if self.index > self.limit:
|
||||
self.posts = None
|
||||
return None
|
||||
if self.index >= len(self.posts):
|
||||
if self.index > len(self.posts):
|
||||
# self.posts = self.driver.find_elements_by_css_selector("a[class='_5pcq']")
|
||||
# self.posts = self.driver.find_elements_by_xpath("//span[@class='fsm fwn fcg']/a[@class='_5pcq'][1]")
|
||||
# self.posts = self.driver.find_elements_by_xpath("//div[@class='_1dwg']//span[@class='fsm fwn fcg'][1]/a[@class='_5pcq'][1]")
|
||||
self.posts = self.find_posts()
|
||||
if self.index >= len(self.posts):
|
||||
if self.index > len(self.posts):
|
||||
if self.load_more_posts() is False:
|
||||
self.posts = None
|
||||
return None
|
||||
@@ -666,7 +666,7 @@ class FacebookPageCrawler:
|
||||
self.driver.set_window_size(size['width'], size["height"])
|
||||
self.driver.set_window_position(position['x'], position['y'])
|
||||
return True
|
||||
if self.reload_count < 15:
|
||||
if self.reload_count < 10:
|
||||
print_and_flush("refresh")
|
||||
self.driver.refresh()
|
||||
wait(5)
|
||||
|
||||
@@ -203,7 +203,8 @@ class KakaoBodyCrawler:
|
||||
|
||||
def find_feeling_users(self):
|
||||
try:
|
||||
a = self.activity.find_element_by_xpath("div/div[@class='comment']/div[@class='count_group _countContainer']/div[@class='inner']/a[@class='_btnViewLikes' and not(@style)]")
|
||||
#a = self.activity.find_element_by_xpath("div/div[@class='comment ']/div[@class='count_group _countContainer']/div[@class='inner']/a[@class='_btnViewLikes' and not(@style)]")
|
||||
a = self.activity.find_element_by_xpath("div/div/div/a[@class='_btnViewLikes' and not(@style)]")
|
||||
except:
|
||||
return None
|
||||
self.enter_element(a)
|
||||
@@ -359,14 +360,15 @@ class KakaoBodyCrawler:
|
||||
|
||||
def find_share_users(self):
|
||||
try:
|
||||
a = self.activity.find_element_by_xpath("div/div[@class='comment']/div[@class='count_group _countContainer']/div[@class='inner']/a[@class='_btnViewShareList' and not(@style)]")
|
||||
#a = self.activity.find_element_by_xpath("div/div[@class='comment ']/div[@class='count_group _countContainer']/a[@class='_btnViewShareList' and not(@style)]")
|
||||
a = self.activity.find_element_by_xpath("div/div/div/a[@class='_btnViewStoryShareList' and not(@style)]")
|
||||
except:
|
||||
return None
|
||||
self.enter_element(a)
|
||||
# inner_layer = self.driver.find_element_by_css_selector("div[class='inner_story_layer _layerContainer']")
|
||||
inner_layer = WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[class='inner_story_layer _layerContainer']")))
|
||||
str_share = inner_layer.find_element_by_css_selector("strong[class='tit_story']")
|
||||
re_share = re.compile("\\(([\\d]+)\\)")
|
||||
re_share = re.compile("([\\d]+)")
|
||||
m = re_share.search(str_share.text)
|
||||
if m is None:
|
||||
share_num = 0
|
||||
@@ -666,15 +668,25 @@ class KakaoReplyCrawler:
|
||||
self.activity = activity
|
||||
|
||||
def has_more(self):
|
||||
more = self.activity.find_element_by_css_selector("p[class='more _commentMoreBtnContainer']")
|
||||
try:
|
||||
more = self.activity.find_element_by_css_selector("p[class='more _showMoreCommentContainer']")
|
||||
except:
|
||||
try:
|
||||
more = self.activity.find_element_by_css_selector("p[class='more _showPrevCommentContainer']")
|
||||
except:
|
||||
return False
|
||||
if more.get_attribute('style').find('block') != -1:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def read_more_reply(self):
|
||||
more = self.activity.find_element_by_css_selector("p[class='more _commentMoreBtnContainer']")
|
||||
a = more.find_element_by_css_selector("a[class='_btnCommentMore']")
|
||||
try:
|
||||
more = self.activity.find_element_by_css_selector("p[class='more _showMoreCommentContainer']")
|
||||
a = more.find_element_by_css_selector("a[class='_btnShowMoreComment']")
|
||||
except:
|
||||
more = self.activity.find_element_by_css_selector("p[class='more _showPrevCommentContainer']")
|
||||
a = more.find_element_by_css_selector("a[class='_btnShowPrevComment']")
|
||||
self.enter_element(a)
|
||||
|
||||
def read_all_reply(self):
|
||||
@@ -685,12 +697,12 @@ class KakaoReplyCrawler:
|
||||
raise WebDriverException
|
||||
|
||||
def get_reply_ul(self):
|
||||
ul = self.activity.find_element_by_xpath("div/div[@class='comment']/div/ul")
|
||||
ul = self.activity.find_element_by_xpath("div/div/div/ul[@class='list _listContainer']")
|
||||
return ul
|
||||
|
||||
def has_reply(self):
|
||||
try:
|
||||
ul = self.activity.find_element_by_xpath("div/div[@class='comment']/div/ul")
|
||||
ul = self.activity.find_element_by_xpath("div/div/div/ul[@class='list _listContainer']")
|
||||
lis = ul.find_elements_by_tag_name("li")
|
||||
if len(lis) > 0:
|
||||
return True
|
||||
@@ -969,9 +981,9 @@ class KakaoPageCrawler:
|
||||
return None
|
||||
while True:
|
||||
self.index += 1
|
||||
if self.index >= len(self.activities):
|
||||
if self.index > len(self.activities):
|
||||
self.activities = self.driver.find_elements_by_css_selector("div[class='section _activity']")
|
||||
if self.index >= len(self.activities):
|
||||
if self.index > len(self.activities):
|
||||
if self.load_more_activities() is False:
|
||||
self.activities = None
|
||||
return None
|
||||
@@ -982,7 +994,8 @@ class KakaoPageCrawler:
|
||||
time_modified_date = self.find_article_modified_date(self.activities[self.index - 1])
|
||||
if time_modified_date is not None:
|
||||
time_date = time_modified_date
|
||||
print_and_flush(str(time_date))
|
||||
print("number of post:", self.index, flush=True)
|
||||
print(str(time_date), flush=True)
|
||||
if type(time_date) == str:
|
||||
continue
|
||||
if self.is_earlier(time_date):
|
||||
@@ -1065,7 +1078,7 @@ class KakaoPageCrawler:
|
||||
self.driver.set_window_size(size['width'], size["height"])
|
||||
self.driver.set_window_position(position['x'], position['y'])
|
||||
return True
|
||||
if self.reload_count < 15:
|
||||
if self.reload_count < 10:
|
||||
print_and_flush("refresh")
|
||||
self.driver.refresh()
|
||||
wait(5)
|
||||
|
||||
Reference in New Issue
Block a user