인스타그램 effect crawler 버그 수정
This commit is contained in:
@@ -23,7 +23,6 @@ from selenium.webdriver.support import expected_conditions as EC
|
|||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||||
|
|
||||||
is_debug = False
|
|
||||||
|
|
||||||
def is_debugger_attached():
|
def is_debugger_attached():
|
||||||
for frame in inspect.stack():
|
for frame in inspect.stack():
|
||||||
@@ -31,6 +30,8 @@ def is_debugger_attached():
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
is_debug = is_debugger_attached()
|
||||||
|
|
||||||
def printl(*objects, sep=' ', end='\n', file=None, flush=True):
|
def printl(*objects, sep=' ', end='\n', file=None, flush=True):
|
||||||
if is_debug:
|
if is_debug:
|
||||||
cur_frame = inspect.currentframe()
|
cur_frame = inspect.currentframe()
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import json
|
|||||||
import requests
|
import requests
|
||||||
import requests.exceptions
|
import requests.exceptions
|
||||||
import time
|
import time
|
||||||
|
import bs4
|
||||||
|
|
||||||
import insta.instaheaders as instaheaders
|
import insta.instaheaders as instaheaders
|
||||||
import insta.instaparser as instaparser
|
import insta.instaparser as instaparser
|
||||||
@@ -73,18 +74,20 @@ class InstaContent:
|
|||||||
self.has_previous = False
|
self.has_previous = False
|
||||||
self.cookies = {}
|
self.cookies = {}
|
||||||
self.proxies = proxies
|
self.proxies = proxies
|
||||||
|
self.query_id = ''
|
||||||
|
self.content = ''
|
||||||
self.load_url(url, cookies, referer, self.proxies)
|
self.load_url(url, cookies, referer, self.proxies)
|
||||||
|
|
||||||
def load_url(self, url, cookies, referer, proxies):
|
def load_url(self, url, cookies, referer, proxies):
|
||||||
self.__set_cookies(cookies)
|
self.__set_cookies(cookies)
|
||||||
self.__r = requests.get(url, headers=instaheaders.get_headers_for_body_html(self.cookies), proxies=proxies,
|
self.__r = requests.get(url, headers=instaheaders.get_headers_for_body_html(self.cookies), proxies=proxies,
|
||||||
timeout=requests_timeout, stream=True)
|
timeout=requests_timeout, stream=True)
|
||||||
content = requests_get(self.__r)
|
self.content = requests_get(self.__r)
|
||||||
self.__r.raise_for_status()
|
self.__r.raise_for_status()
|
||||||
self.__referer = referer
|
self.__referer = referer
|
||||||
self.__code = self.__get_code(url)
|
self.__code = self.__get_code(url)
|
||||||
# self.body, self.reply, self.start_cursor, self.has_previous = instaparser.parse_body_html(self.__r.content)
|
# self.body, self.reply, self.start_cursor, self.has_previous = instaparser.parse_body_html(self.__r.content)
|
||||||
self.body, self.reply, self.start_cursor, self.has_previous = instaparser.parse_body_html(content)
|
self.body, self.reply, self.start_cursor, self.has_previous = instaparser.parse_body_html(self.content)
|
||||||
self.__set_cookies(self.__r.cookies)
|
self.__set_cookies(self.__r.cookies)
|
||||||
self.__r.close()
|
self.__r.close()
|
||||||
return self.body, self.reply
|
return self.body, self.reply
|
||||||
@@ -95,20 +98,52 @@ class InstaContent:
|
|||||||
def get_reply(self):
|
def get_reply(self):
|
||||||
return self.reply
|
return self.reply
|
||||||
|
|
||||||
|
def get_query_ids(self, html):
|
||||||
|
doc = bs4.BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
query_ids = []
|
||||||
|
for script in doc.find_all("script"):
|
||||||
|
if script.has_attr("src") and "_Commons.js" in script['src']:
|
||||||
|
text = requests.get("%s%s" % ('https://www.instagram.com', script['src'])).text
|
||||||
|
for query_id in re.findall("(?<=queryId:\")[0-9]{17,17}", text):
|
||||||
|
query_ids.append(query_id)
|
||||||
|
return query_ids
|
||||||
|
|
||||||
|
def find_query_id(self):
|
||||||
|
potential_query_ids = self.get_query_ids(self.content)
|
||||||
|
query_id = ''
|
||||||
|
for potential_id in potential_query_ids:
|
||||||
|
url = 'https://www.instagram.com/graphql/query/?query_id={}&shortcode={}&first={}&after={}'.format(
|
||||||
|
potential_id, self.__code, len(self.reply), self.start_cursor)
|
||||||
|
try:
|
||||||
|
data = requests.get(url).json()
|
||||||
|
if data['status'] == 'ok':
|
||||||
|
query_id = potential_id
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
# no valid JSON retured, most likely wrong query_id resulting in 'Oops, an error occurred.'
|
||||||
|
pass
|
||||||
|
|
||||||
|
return query_id
|
||||||
|
|
||||||
def load_reply_more(self):
|
def load_reply_more(self):
|
||||||
form_data = instaheaders.get_form_data_for_reply(self.__code, self.start_cursor, num_of_reply_ajax)
|
if not self.query_id:
|
||||||
headers = instaheaders.get_headers_for_ajax(self.cookies, self.__referer, form_data)
|
self.query_id = self.find_query_id()
|
||||||
self.log_load_reply_more_before(form_data, headers)
|
|
||||||
self.__r = requests.post(insta_query, headers=headers, data=form_data, proxies=self.proxies,
|
url = 'https://www.instagram.com/graphql/query/?query_id={}&shortcode={}&first={}&after={}'.format(
|
||||||
|
self.query_id, self.__code, len(self.reply), self.start_cursor)
|
||||||
|
self.__r = requests.get(url, headers=instaheaders.get_headers_for_body_html(self.cookies), proxies=self.proxies,
|
||||||
timeout=requests_timeout, stream=True)
|
timeout=requests_timeout, stream=True)
|
||||||
content = requests_get(self.__r)
|
content = requests_get(self.__r)
|
||||||
self.__r.raise_for_status()
|
self.__r.raise_for_status()
|
||||||
|
reply, self.start_cursor, self.has_previous = instaparser.parse_reply_more(content)
|
||||||
self.__set_cookies(self.__r.cookies)
|
self.__set_cookies(self.__r.cookies)
|
||||||
# self.reply, self.start_cursor, self.has_previous = instaparser.parse_reply_ajax(self.__r.content)
|
|
||||||
self.reply, self.start_cursor, self.has_previous = instaparser.parse_reply_ajax(content)
|
|
||||||
self.__r.close()
|
self.__r.close()
|
||||||
self.log_load_reply_more_after()
|
|
||||||
return self.reply
|
self.reply = self.reply+reply
|
||||||
|
printl('{} - reply : {} (next : {})'.format(self.__referer, len(self.reply), self.start_cursor))
|
||||||
|
|
||||||
|
return reply
|
||||||
|
|
||||||
def get_cookies(self):
|
def get_cookies(self):
|
||||||
return self.cookies
|
return self.cookies
|
||||||
@@ -332,13 +367,19 @@ class EffectInsta(object):
|
|||||||
|
|
||||||
def put_today_buzz(self, buzzs, today_acc_buzz_count):
|
def put_today_buzz(self, buzzs, today_acc_buzz_count):
|
||||||
today = datetime.date.today().strftime('%Y%m%d')
|
today = datetime.date.today().strftime('%Y%m%d')
|
||||||
today_buzz_count = today_acc_buzz_count - buzzs[-2][BUZZ_KEY[ACC]]
|
|
||||||
# if today_buzz_count < 0:
|
|
||||||
# today_buzz_count = 0
|
|
||||||
|
|
||||||
result_buzzs = buzzs.copy()
|
result_buzzs = buzzs.copy()
|
||||||
result_buzzs[-1][BUZZ_KEY[DAY]] = today_buzz_count if today_buzz_count >=0 else 0
|
if len(result_buzzs) == 0:
|
||||||
result_buzzs[-1][BUZZ_KEY[ACC]] = result_buzzs[-2][BUZZ_KEY[ACC]] + today_buzz_count
|
result_buzzs.append({BUZZ_KEY[ACC]:today_acc_buzz_count, BUZZ_KEY[DAY]:today_acc_buzz_count, BUZZ_KEY[DATE]:today})
|
||||||
|
elif len(result_buzzs) == 1:
|
||||||
|
result_buzzs[-1][BUZZ_KEY[ACC]] = today_acc_buzz_count
|
||||||
|
result_buzzs[-1][BUZZ_KEY[DAY]] = today_acc_buzz_count
|
||||||
|
else:
|
||||||
|
result_buzzs[-1][BUZZ_KEY[ACC]] = today_acc_buzz_count
|
||||||
|
result_buzzs[-1][BUZZ_KEY[DAY]] = today_acc_buzz_count - result_buzzs[-2][BUZZ_KEY[ACC]]
|
||||||
|
|
||||||
|
if result_buzzs[-1][BUZZ_KEY[DAY]] < 0:
|
||||||
|
result_buzzs[-1][BUZZ_KEY[DAY]] = 0
|
||||||
|
|
||||||
return result_buzzs
|
return result_buzzs
|
||||||
|
|
||||||
|
|||||||
@@ -43,8 +43,11 @@ class ResultSender:
|
|||||||
if not self.conn.open:
|
if not self.conn.open:
|
||||||
self.connect()
|
self.connect()
|
||||||
with self.conn.cursor() as cursor:
|
with self.conn.cursor() as cursor:
|
||||||
|
try:
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
buzz = cursor.fetchone()
|
buzz = cursor.fetchone()
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
return buzz['replybuzz'] if buzz != None else buzz
|
return buzz['replybuzz'] if buzz != None else buzz
|
||||||
|
|
||||||
|
|||||||
@@ -731,9 +731,9 @@ class InstaContent:
|
|||||||
self.__set_cookies(self.__r.cookies)
|
self.__set_cookies(self.__r.cookies)
|
||||||
self.__r.close()
|
self.__r.close()
|
||||||
|
|
||||||
self.reply += reply
|
self.reply = self.reply + reply
|
||||||
printl('{} - reply : {} (next : {})'.format(self.__referer, len(self.reply), self.start_cursor))
|
printl('{} - reply : {} (next : {})'.format(self.__referer, len(self.reply), self.start_cursor))
|
||||||
return self.reply
|
return reply
|
||||||
|
|
||||||
def get_cookies(self):
|
def get_cookies(self):
|
||||||
return self.cookies
|
return self.cookies
|
||||||
|
|||||||
Reference in New Issue
Block a user