|
|
|
|
@@ -5,6 +5,7 @@ import json
|
|
|
|
|
import requests
|
|
|
|
|
import requests.exceptions
|
|
|
|
|
import time
|
|
|
|
|
import bs4
|
|
|
|
|
|
|
|
|
|
import insta.instaheaders as instaheaders
|
|
|
|
|
import insta.instaparser as instaparser
|
|
|
|
|
@@ -73,18 +74,20 @@ class InstaContent:
|
|
|
|
|
self.has_previous = False
|
|
|
|
|
self.cookies = {}
|
|
|
|
|
self.proxies = proxies
|
|
|
|
|
self.query_id = ''
|
|
|
|
|
self.content = ''
|
|
|
|
|
self.load_url(url, cookies, referer, self.proxies)
|
|
|
|
|
|
|
|
|
|
def load_url(self, url, cookies, referer, proxies):
|
|
|
|
|
self.__set_cookies(cookies)
|
|
|
|
|
self.__r = requests.get(url, headers=instaheaders.get_headers_for_body_html(self.cookies), proxies=proxies,
|
|
|
|
|
timeout=requests_timeout, stream=True)
|
|
|
|
|
content = requests_get(self.__r)
|
|
|
|
|
self.content = requests_get(self.__r)
|
|
|
|
|
self.__r.raise_for_status()
|
|
|
|
|
self.__referer = referer
|
|
|
|
|
self.__code = self.__get_code(url)
|
|
|
|
|
# self.body, self.reply, self.start_cursor, self.has_previous = instaparser.parse_body_html(self.__r.content)
|
|
|
|
|
self.body, self.reply, self.start_cursor, self.has_previous = instaparser.parse_body_html(content)
|
|
|
|
|
self.body, self.reply, self.start_cursor, self.has_previous = instaparser.parse_body_html(self.content)
|
|
|
|
|
self.__set_cookies(self.__r.cookies)
|
|
|
|
|
self.__r.close()
|
|
|
|
|
return self.body, self.reply
|
|
|
|
|
@@ -95,20 +98,52 @@ class InstaContent:
|
|
|
|
|
def get_reply(self):
|
|
|
|
|
return self.reply
|
|
|
|
|
|
|
|
|
|
def get_query_ids(self, html):
|
|
|
|
|
doc = bs4.BeautifulSoup(html, "html.parser")
|
|
|
|
|
|
|
|
|
|
query_ids = []
|
|
|
|
|
for script in doc.find_all("script"):
|
|
|
|
|
if script.has_attr("src") and "_Commons.js" in script['src']:
|
|
|
|
|
text = requests.get("%s%s" % ('https://www.instagram.com', script['src'])).text
|
|
|
|
|
for query_id in re.findall("(?<=queryId:\")[0-9]{17,17}", text):
|
|
|
|
|
query_ids.append(query_id)
|
|
|
|
|
return query_ids
|
|
|
|
|
|
|
|
|
|
def find_query_id(self):
|
|
|
|
|
potential_query_ids = self.get_query_ids(self.content)
|
|
|
|
|
query_id = ''
|
|
|
|
|
for potential_id in potential_query_ids:
|
|
|
|
|
url = 'https://www.instagram.com/graphql/query/?query_id={}&shortcode={}&first={}&after={}'.format(
|
|
|
|
|
potential_id, self.__code, len(self.reply), self.start_cursor)
|
|
|
|
|
try:
|
|
|
|
|
data = requests.get(url).json()
|
|
|
|
|
if data['status'] == 'ok':
|
|
|
|
|
query_id = potential_id
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
# no valid JSON retured, most likely wrong query_id resulting in 'Oops, an error occurred.'
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
return query_id
|
|
|
|
|
|
|
|
|
|
def load_reply_more(self):
|
|
|
|
|
form_data = instaheaders.get_form_data_for_reply(self.__code, self.start_cursor, num_of_reply_ajax)
|
|
|
|
|
headers = instaheaders.get_headers_for_ajax(self.cookies, self.__referer, form_data)
|
|
|
|
|
self.log_load_reply_more_before(form_data, headers)
|
|
|
|
|
self.__r = requests.post(insta_query, headers=headers, data=form_data, proxies=self.proxies,
|
|
|
|
|
timeout=requests_timeout, stream=True)
|
|
|
|
|
if not self.query_id:
|
|
|
|
|
self.query_id = self.find_query_id()
|
|
|
|
|
|
|
|
|
|
url = 'https://www.instagram.com/graphql/query/?query_id={}&shortcode={}&first={}&after={}'.format(
|
|
|
|
|
self.query_id, self.__code, len(self.reply), self.start_cursor)
|
|
|
|
|
self.__r = requests.get(url, headers=instaheaders.get_headers_for_body_html(self.cookies), proxies=self.proxies,
|
|
|
|
|
timeout=requests_timeout, stream=True)
|
|
|
|
|
content = requests_get(self.__r)
|
|
|
|
|
self.__r.raise_for_status()
|
|
|
|
|
reply, self.start_cursor, self.has_previous = instaparser.parse_reply_more(content)
|
|
|
|
|
self.__set_cookies(self.__r.cookies)
|
|
|
|
|
# self.reply, self.start_cursor, self.has_previous = instaparser.parse_reply_ajax(self.__r.content)
|
|
|
|
|
self.reply, self.start_cursor, self.has_previous = instaparser.parse_reply_ajax(content)
|
|
|
|
|
self.__r.close()
|
|
|
|
|
self.log_load_reply_more_after()
|
|
|
|
|
return self.reply
|
|
|
|
|
|
|
|
|
|
self.reply = self.reply+reply
|
|
|
|
|
printl('{} - reply : {} (next : {})'.format(self.__referer, len(self.reply), self.start_cursor))
|
|
|
|
|
|
|
|
|
|
return reply
|
|
|
|
|
|
|
|
|
|
def get_cookies(self):
|
|
|
|
|
return self.cookies
|
|
|
|
|
@@ -332,13 +367,19 @@ class EffectInsta(object):
|
|
|
|
|
|
|
|
|
|
def put_today_buzz(self, buzzs, today_acc_buzz_count):
|
|
|
|
|
today = datetime.date.today().strftime('%Y%m%d')
|
|
|
|
|
today_buzz_count = today_acc_buzz_count - buzzs[-2][BUZZ_KEY[ACC]]
|
|
|
|
|
# if today_buzz_count < 0:
|
|
|
|
|
# today_buzz_count = 0
|
|
|
|
|
|
|
|
|
|
result_buzzs = buzzs.copy()
|
|
|
|
|
result_buzzs[-1][BUZZ_KEY[DAY]] = today_buzz_count if today_buzz_count >=0 else 0
|
|
|
|
|
result_buzzs[-1][BUZZ_KEY[ACC]] = result_buzzs[-2][BUZZ_KEY[ACC]] + today_buzz_count
|
|
|
|
|
if len(result_buzzs) == 0:
|
|
|
|
|
result_buzzs.append({BUZZ_KEY[ACC]:today_acc_buzz_count, BUZZ_KEY[DAY]:today_acc_buzz_count, BUZZ_KEY[DATE]:today})
|
|
|
|
|
elif len(result_buzzs) == 1:
|
|
|
|
|
result_buzzs[-1][BUZZ_KEY[ACC]] = today_acc_buzz_count
|
|
|
|
|
result_buzzs[-1][BUZZ_KEY[DAY]] = today_acc_buzz_count
|
|
|
|
|
else:
|
|
|
|
|
result_buzzs[-1][BUZZ_KEY[ACC]] = today_acc_buzz_count
|
|
|
|
|
result_buzzs[-1][BUZZ_KEY[DAY]] = today_acc_buzz_count - result_buzzs[-2][BUZZ_KEY[ACC]]
|
|
|
|
|
|
|
|
|
|
if result_buzzs[-1][BUZZ_KEY[DAY]] < 0:
|
|
|
|
|
result_buzzs[-1][BUZZ_KEY[DAY]] = 0
|
|
|
|
|
|
|
|
|
|
return result_buzzs
|
|
|
|
|
|
|
|
|
|
|