diff --git a/instagram_scraper/__init__.py b/instagram_scraper/__init__.py index 86ee767..1bdaf47 100755 --- a/instagram_scraper/__init__.py +++ b/instagram_scraper/__init__.py @@ -1 +1 @@ -__version__ = '1.1.9' +__version__ = '1.1.10' diff --git a/instagram_scraper/actions/__init__.py b/instagram_scraper/actions/__init__.py index 3570d6d..d91122f 100644 --- a/instagram_scraper/actions/__init__.py +++ b/instagram_scraper/actions/__init__.py @@ -15,3 +15,4 @@ from .check_if_account_is_private import CheckIfAccountIsPrivate from .grab_post_links import GrabPostLinks from .go_to_link import GoToLink +from .get_id import GetId diff --git a/instagram_scraper/actions/accept_cookies.py b/instagram_scraper/actions/accept_cookies.py index a4d416f..b123ca0 100644 --- a/instagram_scraper/actions/accept_cookies.py +++ b/instagram_scraper/actions/accept_cookies.py @@ -20,12 +20,9 @@ def do(self): try: self._web_driver.find_element_by_css_selector(constants.ACCEPT_COOKIES_CSS).click() except (NoSuchElementException, StaleElementReferenceException, ElementClickInterceptedException) as err: - logger.error('error accepting instagram cookies') - logger.error(err) - self.on_fail() + pass else: self._scraper.cookies_accepted = True def on_fail(self): - print('error accepting instagram cookies') - self._scraper.stop() + pass diff --git a/instagram_scraper/actions/get_id.py b/instagram_scraper/actions/get_id.py new file mode 100644 index 0000000..e280d8b --- /dev/null +++ b/instagram_scraper/actions/get_id.py @@ -0,0 +1,32 @@ +import logging +import json + +from bs4 import BeautifulSoup +from json.decoder import JSONDecodeError + +from .. import constants +from .. import actions + +logger = logging.getLogger('__name__') + + +class GetId(actions.Action): + def __init__(self, scraper, username): + super().__init__(scraper) + self.__username = username + + def do(self): + """ Get the id of a username """ + + link = constants.INSTAGRAM_USER_INFO_URL_DEFAULT.format(self.__username) + actions.GoToLink(self._scraper, link).do() + result = self._scraper.web_driver.page_source + soup = BeautifulSoup(result, 'html.parser') + try: + data = json.loads(soup.text) + return data['graphql']['user']['id'] + except (JSONDecodeError, KeyError) as err: + logger.error('could not retrieve user id: %s', str(err)) + + def on_fail(self): + pass diff --git a/instagram_scraper/scraper.py b/instagram_scraper/scraper.py index a171c09..d354a5a 100755 --- a/instagram_scraper/scraper.py +++ b/instagram_scraper/scraper.py @@ -48,6 +48,7 @@ def __start_web_driver(self): driver_options = ChromeOptions() driver_options.add_experimental_option('excludeSwitches', ['enable-logging']) + driver_options.add_argument('--mute-audio') driver_options.headless = not self.__headful webdriver_path = None @@ -130,6 +131,8 @@ def __init_scrape_stories(self, user): + str(stories_amount) + ' image(s)/video(s) will be downloaded from stories: ' + self.__c_style.RESET_ALL) actions.ScrapeStories(self, user, stories_amount).do() + else: + print('no stories found') def __filter_post_links(self, user): """ @@ -158,9 +161,16 @@ def init_scrape_users(self, users): user.create_user_output_directories() + # Retrieve the id using requests userid = get_data.get_id_by_username_from_ig(user.username) + + # Retrieve the id using actions if previous function has failed + if userid is None: + userid = actions.GetId(self, user.username).do() + + # Continue if id not found (username does not exists) if userid is None: - print(self.__c_fore.RED + 'username not found' + self.__c_style.RESET_ALL) + print(self.__c_fore.RED + 'could not load user profile' + self.__c_style.RESET_ALL) continue actions.ScrapeDisplay(self, user).do() @@ -253,6 +263,10 @@ def cookies_accepted(self, accepted): def web_driver(self): return self.__web_driver + @property + def login_username(self): + return self.__login_username + @property def database(self): return self.__database diff --git a/setup.py b/setup.py index e05e716..8e7b545 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ long_description = fh.read() name = 'igscraper' -version = '1.1.9' +version = '1.1.10' requires = [ 'colorama>=0.4.3', @@ -13,7 +13,7 @@ 'selenium>=3.141.0', 'urllib3>=1.25.10', 'bs4>=0.0.1', - 'get-chromedriver>=1.1.9' + 'get-chromedriver>=1.1.10' ] setup(