Skip to content
This repository has been archived by the owner on Aug 31, 2021. It is now read-only.

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
zaironjacobs committed Nov 28, 2020
1 parent 3e96d65 commit a55141e
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 9 deletions.
2 changes: 1 addition & 1 deletion instagram_scraper/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.1.9'
__version__ = '1.1.10'
1 change: 1 addition & 0 deletions instagram_scraper/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
from .check_if_account_is_private import CheckIfAccountIsPrivate
from .grab_post_links import GrabPostLinks
from .go_to_link import GoToLink
from .get_id import GetId
7 changes: 2 additions & 5 deletions instagram_scraper/actions/accept_cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,9 @@ def do(self):
try:
self._web_driver.find_element_by_css_selector(constants.ACCEPT_COOKIES_CSS).click()
except (NoSuchElementException, StaleElementReferenceException, ElementClickInterceptedException) as err:
logger.error('error accepting instagram cookies')
logger.error(err)
self.on_fail()
pass
else:
self._scraper.cookies_accepted = True

def on_fail(self):
print('error accepting instagram cookies')
self._scraper.stop()
pass
32 changes: 32 additions & 0 deletions instagram_scraper/actions/get_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import logging
import json

from bs4 import BeautifulSoup
from json.decoder import JSONDecodeError

from .. import constants
from .. import actions

logger = logging.getLogger('__name__')


class GetId(actions.Action):
def __init__(self, scraper, username):
super().__init__(scraper)
self.__username = username

def do(self):
""" Get the id of a username """

link = constants.INSTAGRAM_USER_INFO_URL_DEFAULT.format(self.__username)
actions.GoToLink(self._scraper, link).do()
result = self._scraper.web_driver.page_source
soup = BeautifulSoup(result, 'html.parser')
try:
data = json.loads(soup.text)
return data['graphql']['user']['id']
except (JSONDecodeError, KeyError) as err:
logger.error('could not retrieve user id: %s', str(err))

def on_fail(self):
pass
16 changes: 15 additions & 1 deletion instagram_scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def __start_web_driver(self):

driver_options = ChromeOptions()
driver_options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver_options.add_argument('--mute-audio')
driver_options.headless = not self.__headful

webdriver_path = None
Expand Down Expand Up @@ -130,6 +131,8 @@ def __init_scrape_stories(self, user):
+ str(stories_amount) + ' image(s)/video(s) will be downloaded from stories: '
+ self.__c_style.RESET_ALL)
actions.ScrapeStories(self, user, stories_amount).do()
else:
print('no stories found')

def __filter_post_links(self, user):
"""
Expand Down Expand Up @@ -158,9 +161,16 @@ def init_scrape_users(self, users):

user.create_user_output_directories()

# Retrieve the id using requests
userid = get_data.get_id_by_username_from_ig(user.username)

# Retrieve the id using actions if previous function has failed
if userid is None:
userid = actions.GetId(self, user.username).do()

# Continue if id not found (username does not exists)
if userid is None:
print(self.__c_fore.RED + 'username not found' + self.__c_style.RESET_ALL)
print(self.__c_fore.RED + 'could not load user profile' + self.__c_style.RESET_ALL)
continue

actions.ScrapeDisplay(self, user).do()
Expand Down Expand Up @@ -253,6 +263,10 @@ def cookies_accepted(self, accepted):
def web_driver(self):
return self.__web_driver

@property
def login_username(self):
return self.__login_username

@property
def database(self):
return self.__database
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
long_description = fh.read()

name = 'igscraper'
version = '1.1.9'
version = '1.1.10'

requires = [
'colorama>=0.4.3',
'requests>=2.24.0',
'selenium>=3.141.0',
'urllib3>=1.25.10',
'bs4>=0.0.1',
'get-chromedriver>=1.1.9'
'get-chromedriver>=1.1.10'
]

setup(
Expand Down

0 comments on commit a55141e

Please sign in to comment.