git-svn-id: svn://192.168.0.12/source@348 8346c931-da38-4b9b-9d4c-e48b93cbd075

This commit is contained in:
admin
2017-05-30 03:32:11 +00:00
parent dad0365a79
commit b873412ef2
9 changed files with 616 additions and 30 deletions

View File

@@ -1,5 +1,11 @@
[database]
[#database]
user=root
pass=1234
host=192.168.0.82
name=bigbird
[database]
user=admin
pass=con2214lac!
host=182.162.171.147
name=bigbird

View File

@@ -0,0 +1,86 @@
class InstaUrlValidator:
def __init__(self, input_url):
self.protocol = 'https'
self.host = 'www.instagram.com'
self.path1 = 'p'
self.input_user_key = ''
self.input_url = input_url
def preprocess_input_url(self):
if type(self.input_url) != str:
raise TypeError('input url error')
self.preprocessed_input_url = self.input_url.strip()
def check_protocol(self):
start_index = 0
end_index = self.preprocessed_input_url.find(':')
if end_index == -1:
return start_index
if self.preprocessed_input_url[end_index+1] != '/' or self.preprocessed_input_url[end_index+2] != '/':
raise ValueError('incorrect url format')
return end_index + 3
def check_host(self, start_index):
end_index = self.preprocessed_input_url.find('/', start_index)
if end_index == -1:
raise ValueError('incorrect url format')
input_host = self.preprocessed_input_url[start_index:end_index]
if input_host not in self.host:
raise ValueError('incorrect host')
return end_index + 1
def check_path1(self, start_index):
end_index = self.preprocessed_input_url.find('/', start_index)
if end_index == -1:
raise ValueError('incorrect path')
input_path1 = self.preprocessed_input_url[start_index:end_index]
if input_path1 != self.path1:
raise ValueError('incorrect path (/p/)')
return end_index + 1
def check_path2(self, start_index):
end_index = self.preprocessed_input_url.find('/', start_index)
# if end_index == -1:
# raise ValueError('incorrect path')
#
# self.input_user_key = self.preprocessed_input_url[start_index:end_index]
if end_index != -1:
self.input_user_key = self.preprocessed_input_url[start_index:end_index]
else:
self.input_user_key = self.preprocessed_input_url[start_index:]
def make_instagram_url(self):
if len(self.input_user_key) <= 0:
raise ValueError('incorrect user key')
url = self.protocol + '://' + self.host + '/' + self.path1 + '/' + self.input_user_key + '/'
return url
def validate_url(self):
try:
self.preprocess_input_url()
start_index = self.check_protocol()
start_index = self.check_host(start_index)
start_index = self.check_path1(start_index)
self.check_path2(start_index)
except Exception as e:
raise e
def get_insta_url(self):
try:
self.validate_url()
url = self.make_instagram_url()
except Exception as e:
raise e
return url

View File

@@ -36,6 +36,20 @@ insta_tag_url = "https://www.instagram.com/explore/tags/"
insta_query = "https://www.instagram.com/query/"
insta_body_url = 'https://www.instagram.com/p/'
DATE = 0
REPLY_DAY = 1
REPLY_ACC = 2
LIKE_DAY = 3
LIKE_ACC = 4
DAY = 5
ACC = 6
REPLY = 7
LIKE = 8
BUZZ_KEY = [
"date", "reply_day", "reply_acc", "like_day", "like_acc", "day", "acc"
]
def requests_get(req, timeout=requests_timeout):
body = []
@@ -136,10 +150,25 @@ class InstaContent:
class EffectInsta(object):
def __init__(self, event_num, event_code, url):
def __init__(self, event_num, event_code, url, start_date):
self.event_num = event_num
self.event_code = event_code
self.url = url
self.start_date = start_date.replace("-", "")
self.database = self.database_init()
def database_init(self):
try:
cg = get_settings()
except Exception as e:
raise effect.effecterror.UnknownError(str(e) + '\n' + 'effect.ini setting error')
database = ResultSender(cg['host'], cg['user'], cg['pass'], cg['name']) if cg else ResultSender()
database.connect()
return database
def start(self):
#content = insta.instacrawl.InstaContent(self.url, {}, self.url)
@@ -187,8 +216,12 @@ class EffectInsta(object):
result['replycount'] = int(body.get('article_order'), 0)
result['likecount'] = int(body.get('reply_url'), 0)
result['interactioncount'] = self.get_replycount(body, replies)
result['replybuzz'] = self.get_reply_buzz(body, replies)
replybuzz = self.get_reply_buzz(body, replies)
likebuzzs = self.get_like_buzz(int(body.get('reply_url'), 0))
totalbuzz = self.summary_reply_and_like(replybuzz, likebuzzs)
result['replybuzz'] = json.dumps(totalbuzz, sort_keys=True)
result['engagementcount'] = result.get('likecount', 0) + result.get('replycount', 0)
return result
def get_replycount(self, body, replies):
@@ -197,22 +230,171 @@ class EffectInsta(object):
set_reply_id.add(i.get('article_id', ''))
return len(set_reply_id) - 1 if body.get('article_id') in set_reply_id else len(set_reply_id)
# def get_reply_buzz(self, body, replies):
# start_date = datetime.datetime.strptime(body['article_date'], '%Y-%m-%d %H:%M:%S').date()
# end_date = datetime.datetime.now().date()
# date_dict = dict()
# while start_date <= end_date:
# date_dict[start_date.strftime('%Y%m%d')] = 0
# start_date = start_date + datetime.timedelta(days=1)
#
# for reply in replies:
# str_reply_date = reply.get('article_date', '1990-01-01 00:00:00')
# reply_date = datetime.datetime.strptime(str_reply_date, '%Y-%m-%d %H:%M:%S').date().strftime('%Y%m%d')
# print(reply_date)
# if reply_date in date_dict:
# date_dict[reply_date] = date_dict[reply_date] + 1
#
# print(date_dict)
#
# json_array = [{'date': k, 'value': v} for k, v in date_dict.items()]
#
# return json.dumps(json_array, sort_keys=True)
def get_reply_buzz(self, body, replies):
start_date = datetime.datetime.strptime(body['article_date'], '%Y-%m-%d %H:%M:%S').date()
end_date = datetime.datetime.now().date()
start_date = datetime.datetime.strptime(self.start_date, '%Y%m%d').date()
today = datetime.datetime.now().date()
date_dict = dict()
while start_date <= end_date:
while start_date <= today:
date_dict[start_date.strftime('%Y%m%d')] = 0
start_date = start_date + datetime.timedelta(days=1)
for reply in replies:
str_reply_date = reply.get('article_date', '1990-01-01 00:00:00')
reply_date = datetime.datetime.strptime(str_reply_date, '%Y-%m-%d %H:%M:%S').date().strftime('%m-%d-%Y')
str_reply_date = reply.get('article_date')
reply_date = datetime.datetime.strptime(str_reply_date, '%Y-%m-%d %H:%M:%S').date().strftime('%Y%m%d')
if reply_date in date_dict:
date_dict[reply_date] = date_dict[reply_date] + 1
json_array = [{'date': k, 'value': v} for k, v in date_dict.items()]
reply_buzzs = self.make_dummy_buzzs(self.start_date, datetime.datetime.today().strftime('%Y%m%d'))
reply_acc_count = 0
for reply_buzz in reply_buzzs:
date = reply_buzz[BUZZ_KEY[DATE]]
reply_count = date_dict[date]
reply_acc_count += reply_count
reply_buzz[BUZZ_KEY[DAY]] = date_dict[date]
reply_buzz[BUZZ_KEY[ACC]] = reply_acc_count
return json.dumps(json_array, sort_keys=True)
# json_array = [{'date': k, 'value': v} for k, v in date_dict.items()]
# return json.dumps(json_array, sort_keys=True)
return reply_buzzs
def get_like_buzz(self, like_count):
start_date = datetime.datetime.strptime(self.start_date, '%Y%m%d').date()
today = datetime.datetime.today().strftime('%Y%m%d')
try:
buzzs = self.database.get_buzz(self.event_num)
if buzzs != None:
buzzs = json.loads(buzzs)
else:
buzzs = []
buzzs = self.get_buzzs(buzzs, LIKE)
like_dummy_buzzs = self.make_dummy_buzzs(self.start_date, today)
like_buzzs = self.fill_buzzs_into_dummy(buzzs, like_dummy_buzzs)
like_buzzs = self.put_today_buzz(like_buzzs, like_count)
except Exception as e:
raise effect.effecterror.DBQueryError(str(e))
return like_buzzs
def make_base_buzz_instance(self, values):
base_buzz_instance = dict()
base_buzz_instance[BUZZ_KEY[DATE]] = values[0]
base_buzz_instance[BUZZ_KEY[DAY]] = values[1]
base_buzz_instance[BUZZ_KEY[ACC]] = values[2]
return base_buzz_instance
def make_summary_buzz_instance(self, values):
summary_buzz_instance = dict()
summary_buzz_instance[BUZZ_KEY[DATE]] = values[0]
summary_buzz_instance[BUZZ_KEY[REPLY_DAY]] = values[1][BUZZ_KEY[DAY]]
summary_buzz_instance[BUZZ_KEY[REPLY_ACC]] = values[1][BUZZ_KEY[ACC]]
summary_buzz_instance[BUZZ_KEY[LIKE_DAY]] = values[2][BUZZ_KEY[DAY]]
summary_buzz_instance[BUZZ_KEY[LIKE_ACC]] = values[2][BUZZ_KEY[ACC]]
return summary_buzz_instance
def make_dummy_buzzs(self, start_date, end_date):
startdate = datetime.datetime.strptime(start_date, '%Y%m%d')
enddate = datetime.datetime.strptime(end_date, '%Y%m%d')
buzzs = []
while startdate <= enddate:
buzz_instance = self.make_base_buzz_instance([startdate.strftime('%Y%m%d'), 0, 0])
buzzs.append(buzz_instance)
startdate += datetime.timedelta(days=1)
return buzzs
def put_today_buzz(self, buzzs, today_acc_buzz_count):
today = datetime.date.today().strftime('%Y%m%d')
today_buzz_count = today_acc_buzz_count - buzzs[-2][BUZZ_KEY[ACC]]
# if today_buzz_count < 0:
# today_buzz_count = 0
result_buzzs = buzzs.copy()
result_buzzs[-1][BUZZ_KEY[DAY]] = today_buzz_count if today_buzz_count >=0 else 0
result_buzzs[-1][BUZZ_KEY[ACC]] = result_buzzs[-2][BUZZ_KEY[ACC]] + today_buzz_count
return result_buzzs
def fill_buzzs_into_dummy(self, buzzs, dummy):
buzzs_clone = buzzs.copy()
dummy_clone = dummy.copy()
for dummy_buzz, real_buzz in zip(dummy_clone, buzzs_clone):
dummy_buzz[BUZZ_KEY[DATE]] = real_buzz[BUZZ_KEY[DATE]]
dummy_buzz[BUZZ_KEY[DAY]] = real_buzz[BUZZ_KEY[DAY]]
dummy_buzz[BUZZ_KEY[ACC]] = real_buzz[BUZZ_KEY[ACC]]
for index, dummy_buzz in enumerate(dummy_clone):
previous_index = index - 1
previous_acc_value = dummy_clone[previous_index][BUZZ_KEY[ACC]]
current_acc_value = dummy_buzz[BUZZ_KEY[ACC]]
if previous_acc_value > 0 and current_acc_value == 0 and previous_index >= 0:
dummy_buzz[BUZZ_KEY[ACC]] = previous_acc_value
return dummy_clone
def get_buzzs(self, buzzs, buzz_type):
result_buzzs = []
if buzz_type == LIKE:
for buzz in buzzs:
buzz_instance = self.make_base_buzz_instance([
buzz[BUZZ_KEY[DATE]],
buzz[BUZZ_KEY[LIKE_DAY]],
buzz[BUZZ_KEY[LIKE_ACC]]
])
result_buzzs.append(buzz_instance)
return result_buzzs
def is_valid_data(self, reply_buzzs, like_buzzs):
reply_dates = self.get_date_list(reply_buzzs)
like_dates = self.get_date_list(like_buzzs)
if reply_dates == like_dates:
return True
else:
return False
def summary_reply_and_like(self, reply_buzzs, like_buzzs):
# if self.is_valid_data(reply_buzzs, like_buzzs) == False:
# raise IndexError("")
summary_buzzs = []
for reply_buzz, like_buzz in zip(reply_buzzs, like_buzzs):
date = reply_buzz[BUZZ_KEY[DATE]]
summary_buzz_instance = self.make_summary_buzz_instance([
date,
reply_buzz,
like_buzz
])
summary_buzzs.append(summary_buzz_instance)
return summary_buzzs

View File

@@ -37,6 +37,17 @@ class ResultSender:
return query + ",".join(key_list) + ") values (" + ",".join(val_list) + ")" + " on duplicate key update " + \
','.join(map(lambda x:x[0] + '=' + x[1], zip(key_list, val_list)))
def get_buzz(self, event_num):
query = 'select replybuzz from stats_s1_effect where event_num = ' + str(event_num)
if not self.conn.open:
self.connect()
with self.conn.cursor() as cursor:
cursor.execute(query)
buzz = cursor.fetchone()
return buzz['replybuzz'] if buzz != None else buzz
def send(self, table_name, dictionary):
query = self._make_query(table_name, dictionary)
self._exec_query(query)

View File

@@ -1,6 +1,7 @@
import effect.effectinstagram
import effect.effecterror
import effect.effectkakaostory
from effect.InstaUrlValidator import InstaUrlValidator
from base.baseclasses import printl
import sys
import base.baseclasses
@@ -33,9 +34,17 @@ def get_browser_info(platform_, file_name="browser.txt"):
return options.get(platform_, options['default'])
def get_effect_process(platform_, event_num, url):
def get_effect_process(platform_, event_num, url, start_date):
if platform_ == 'instagram':
return effect.effectinstagram.EffectInsta(int(event_num), int(event_num), url)
try:
insta_url_validator = InstaUrlValidator(url)
insta_url = insta_url_validator.get_insta_url()
except Exception as e:
printl("x!@#!@#!@#e010!@#check url")
exit(1)
# return effect.effectinstagram.EffectInsta(int(event_num), int(event_num), url, start_date)
return effect.effectinstagram.EffectInsta(int(event_num), int(event_num), insta_url, start_date)
else:
browser_info = get_browser_info(platform_)
@@ -52,14 +61,19 @@ if __name__ == '__main__':
sys.argv[1] instagram, kakaostory, facebook
sys.argv[2] event_num
sys.argv[3] url
sys.argv[4] start date
"""
if len(sys.argv) != 4:
# if len(sys.argv) != 4:
# printl("x!@#!@#!@#e010!@#check argument")
# exit(1)
if len(sys.argv) != 5:
printl("x!@#!@#!@#e010!@#check argument")
exit(1)
try:
effect_process = get_effect_process(sys.argv[1], sys.argv[2], sys.argv[3])
effect_process = get_effect_process(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
effect_process.start()
except effect.effecterror.EffectException as e:
printl("x!@#" + str(sys.argv[2]) + "!@#" + str(sys.argv[3]) + "!@#" + str(e))

View File

@@ -0,0 +1,91 @@
import time
from selenium.common.exceptions import WebDriverException
from base.baseclasses import find_element_by_css_selector
from base.baseclasses import find_elements_by_css_selector
from bs4 import BeautifulSoup
try:
import lxml
parser_opt = 'lxml'
except ImportError:
parser_opt = 'html.parser'
limit_reload = 5
list_tag_css_selector = "div#initial_browse_result"
list_page_css_selector = "div#pagelet_timeline_main_column"
list_group_css_selector = "div#pagelet_group_"
each_post_css_selector = "div._4-u2._4-u8"
wait_second_for_find_element = 30
class ListBase(object):
def __init__(self, driver):
self.driver = driver
self.url_list = []
self.list_css_selector = None
self.list_container_dom = None
self.current_post = None
def set_url_elements(self):
elements = find_element_by_css_selector(self.driver,
self.list_css_selector + " " + each_post_css_selector,
wait_second_for_find_element)
self.url_list.extend(elements)
def move_first(self):
self.url_list = self.current_post.pop(0) if self.url_list else None
def move_next(self):
self.move_first()
def check_list_and_load(self):
for _ in range(limit_reload):
num_of_list = len(self.url_list)
if num_of_list < 2:
self.load_more_list()
num_of_list = self.get_num_of_list()
if not num_of_list:
raise WebDriverException("There is no data or ajax error")
def load_more_list(self):
position = self.driver.get_window_position()
size = self.driver.get_window_size()
self.driver.maximize_window()
self.driver.set_window_size(size['width'], size["height"])
self.driver.set_window_position(position['x'], position['y'])
for _ in range(2):
self.driver.execute_script("window.scrollBy(0, -400)")
time.sleep(0.3)
for _ in range(4):
self.driver.execute_script("window.scrollBy(0, 800)")
time.sleep(0.3)
def has_next(self):
raise NotImplementedError
def get_url(self):
raise NotImplementedError
def get_date(self):
raise NotImplementedError
def remove_current_post(self):
css_selector = "div#" + self.current_post.id
self.driver.execute_script('document.querySelector("' + css_selector + '").remove()')
def get_num_of_list(self):
raise NotImplementedError
class ListTag(ListBase):
def __init__(self, driver):
super().__init__(driver)
self.list_css_selector = list_tag_css_selector
class ListPage(ListBase):
def __init__(self, driver):
self.driver = driver
self.list_css_selector = list_page_css_selector

View File

@@ -0,0 +1,197 @@
#-*- coding: utf-8 -*-
import logging
import re
import json
import datetime
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import WebDriverException
from bs4 import BeautifulSoup
from base.baseclasses import SendtoDB
from base.baseclasses import print_and_flush
from base.baseclasses import CrawlInit
from base.baseclasses import wait
from base.baseclasses import find_element_by_css_selector
from base.baseclasses import find_elements_by_css_selector
from base.baseclasses import find_elements_by_xpath
from base.baseclasses import enter_element
from base.baseclasses import Browser
facebook_url = "http://bigbird.iptime.org/fbtest.php"
facebook_tag_url = "https://www.facebook.com/hashtag/"
facebook_id = 'concepters22@gmail.com'
facebook_password = 'zjstpqxjtm'
class FacebookInit(CrawlInit):
def __init__(self, before_day=0):
super().__init__(before_day)
self.urls = dict()
self.urls[11] = facebook_tag_url
self.urls[12] = facebook_url
def split_searches(self):
search = self.searches()
splited_list = search.split(',')
return [x.strip() if self.platform() == 12 else self.utf8(x) for x in splited_list]
# trimmed_list = list()
# if self.platform() == 12:
# for x in splited_list:
# trimmed_list.append(x.strip())
# else:
# for x in splited_list:
# trimmed_list.append(self.utf8(x))
# return trimmed_list
def make_url(self):
return [self.urls[self.platform()] + ('profile.php?id=' + x if x.isnumeric() else x) + "?fref=ts"
for x in self.split_searches()]
# return [self.urls[self.platform()] + x for x in self.split_searches()]
# urls = list()
# for x in self.split_searches():
# url = self.urls[self.platform()] + x + "?fref=ts"
# urls.append(url)
# return urls
def get_begin_day(self):
if self.is_realtime():
date_now = datetime.datetime.now()
result = datetime.datetime(year=date_now.year, month=date_now.month, day=date_now.day)
result += datetime.timedelta(days=self.before_day)
return result
else:
return self.start_day()
def get_end_day(self):
if self.is_realtime():
date_now = datetime.datetime.now()
result = datetime.datetime(year=date_now.year, month=date_now.month, day=date_now.day)
return result
else:
return self.end_day()
def is_hashtag(self):
return False if self.platform() == 12 else True
class FacebookMainCrawler:
def __init__(self):
self.crawl_init = FacebookInit()
self.browser = Browser()
self.driver = None
self.keyword_id = None
self.url = None
self.db_num = None
def set_driver(self, driver):
self.driver = driver
def set_keyword_id(self, keyword_id):
self.keyword_id = keyword_id
def start(self):
self.crawl_start()
def set_arguments(self, browser, keyword_id, db_num, before_day, until_page):
self.init_browser(browser)
self.init_keyword_id(keyword_id)
self.init_db(db_num)
self.init_before_day(before_day)
self.init_until_page(until_page)
def init_browser(self, browser):
self.set_driver(self.browser.get_new_driver(browser))
def init_keyword_id(self, keyword_id):
self.keyword_id = int(keyword_id) if type(keyword_id) != int else keyword_id
self.crawl_init.get_keyword_parameters(keyword_id)
self.crawl_init.disconnect()
def init_db(self, db_num):
self.db_num = db_num
def init_before_day(self, before_day):
self.crawl_init.set_before_day(before_day)
def init_until_page(self, until_page):
self.crawl_init.set_until_page(until_page)
def set_main_window_handler(self, window_handler):
self.main_window_handler = window_handler
def crawl_start(self):
real_time = True
while real_time:
print_and_flush("Crawler Start")
url_list = self.crawl_init.make_url()
i = 0
backup_set = set()
while i < len(url_list):
try:
self.set_main_window_handler(self.driver.window_handles[0])
print_and_flush(url_list[i] + "\n")
self.driver.get(url_list[i])
wait(5)
self.facebook_login()
body = self.driver.find_element_by_tag_name('body')
self.click_element(body)
self.page_crawler.set_date(begin_date=self.crawl_init.get_begin_day(),
end_date=self.crawl_init.get_end_day())
self.crawl_all_current_url(backup_set)
i += 1
backup_set.clear()
except Exception as e:
logging.info(e)
self.driver.quit()
self.set_driver(self.browser.new_browser())
wait(5)
real_time = self.crawl_init.is_realtime()
print_and_flush("Finished Crawling :)")
self.driver.quit()
def go_bigbird(self, driver):
driver.get(facebook_url)
def click_facebook_login(self, driver):
element_a = find_element_by_css_selector(driver, "a[href]", 15)
enter_element(element_a)
def login_facebook(self, driver, f_id, f_pw):
element_email = find_element_by_css_selector(driver, "input#email", 15)
element_password = find_element_by_css_selector(driver, "input#pass", 15)
element_button = find_element_by_css_selector(driver, "button#loginbutton", 15)
element_email.send_keys(f_id)
element_password.send_keys(f_pw)
enter_element(element_button)
def facebook_login(self):
try:
element_email = find_element_by_css_selector(self.driver, '#email', 15)
element_pwd = find_element_by_css_selector(self.driver, '#pass', 15)
except:
return
email = 'concepters22@gmail.com'
password = 'zjstpqxjtm'
element_email.send_keys(email)
element_pwd.send_keys(password)
label = self.driver.find_element_by_css_selector('#loginbutton')
element_input = label.find_element_by_xpath('input')
element_input.send_keys(Keys.NULL)
element_input.send_keys(Keys.ENTER)
wait(5)
def click_element(self, element):
ac = ActionChains(self.driver)
# ac.move_to_element_with_offset(element, 0, 0).click().perform()
ac.move_to_element(element).click().perform()
wait(4)

View File

@@ -314,7 +314,6 @@ def crawl_content_process(qu, keyword_id, db_num):
break
ok = True
while ok:
time.sleep(2)
try:
# get a instance of InstaContent by do_no_proxy func.
# if element['url'] is invalid, content is None

View File

@@ -103,10 +103,10 @@ def parse_body_html(content):
start_cursor = None
has_previous = False
if postpage:
media = postpage[0]["media"]
media = postpage[0]["graphql"]["shortcode_media"]
body = {
"article_date": (old_date + datetime.timedelta(seconds=media["date"])).strftime("%Y-%m-%d %H:%M:%S"),
"article_data": media["caption"],
"article_date": (old_date + datetime.timedelta(seconds=media["taken_at_timestamp"])).strftime("%Y-%m-%d %H:%M:%S"),
"article_data": media["edge_media_to_caption"]["edges"][0]["node"]["text"],
"article_id": media["owner"]["username"],
"article_nickname": media["owner"]["username"],
"platform_id": media["owner"]["username"],
@@ -115,22 +115,22 @@ def parse_body_html(content):
"platform_title": media["owner"]["username"],
"article_form": "body",
"article_profileurl": media["owner"]["profile_pic_url"],
"article_order": str(media["comments"]["count"]),
"article_hit": str(media.get('video_views', 0)),
"reply_url": str(media["likes"]["count"])
"article_order": str(media["edge_media_to_comment"]["count"]),
"article_hit": str(0),
"reply_url": str(media["edge_media_preview_like"]["count"])
}
comments = postpage[0]["media"]["comments"]
has_previous = comments["page_info"]["has_previous_page"]
start_cursor = comments["page_info"]["start_cursor"]
nodes = comments["nodes"]
comments = postpage[0]["graphql"]["shortcode_media"]["edge_media_to_comment"]
has_previous = comments["page_info"]["has_next_page"]
start_cursor = comments["page_info"]["end_cursor"]
nodes = comments["edges"]
for node in nodes:
reply.append({
"article_data": node["text"],
"article_data": node["node"]["text"],
"article_date":
(old_date + datetime.timedelta(seconds=node["created_at"])).strftime("%Y-%m-%d %H:%M:%S"),
"article_id": node["user"]["username"],
"article_nickname": node["user"]["username"],
"article_profileurl": node["user"]["profile_pic_url"],
(old_date + datetime.timedelta(seconds=node["node"]["created_at"])).strftime("%Y-%m-%d %H:%M:%S"),
"article_id": node["node"]["owner"]["username"],
"article_nickname": node["node"]["owner"]["username"],
"article_profileurl": node["node"]["owner"]["profile_pic_url"],
"platform_name": "instagram",
"platform_form": "post",
"article_form": "reply"