update

zaironjacobs · Nov 28, 2020 · a55141e · a55141e
1 parent 3e96d65
commit a55141e
Show file tree

Hide file tree

Showing 6 changed files with 53 additions and 9 deletions.
diff --git a/instagram_scraper/__init__.py b/instagram_scraper/__init__.py
@@ -1 +1 @@
-__version__ = '1.1.9'
+__version__ = '1.1.10'
diff --git a/instagram_scraper/actions/__init__.py b/instagram_scraper/actions/__init__.py
@@ -15,3 +15,4 @@
 from .check_if_account_is_private import CheckIfAccountIsPrivate
 from .grab_post_links import GrabPostLinks
 from .go_to_link import GoToLink
+from .get_id import GetId
diff --git a/instagram_scraper/actions/accept_cookies.py b/instagram_scraper/actions/accept_cookies.py
@@ -20,12 +20,9 @@ def do(self):
         try:
             self._web_driver.find_element_by_css_selector(constants.ACCEPT_COOKIES_CSS).click()
         except (NoSuchElementException, StaleElementReferenceException, ElementClickInterceptedException) as err:
-            logger.error('error accepting instagram cookies')
-            logger.error(err)
-            self.on_fail()
+            pass
         else:
             self._scraper.cookies_accepted = True
 
     def on_fail(self):
-        print('error accepting instagram cookies')
-        self._scraper.stop()
+        pass
diff --git a/instagram_scraper/actions/get_id.py b/instagram_scraper/actions/get_id.py
@@ -0,0 +1,32 @@
+import logging
+import json
+
+from bs4 import BeautifulSoup
+from json.decoder import JSONDecodeError
+
+from .. import constants
+from .. import actions
+
+logger = logging.getLogger('__name__')
+
+
+class GetId(actions.Action):
+    def __init__(self, scraper, username):
+        super().__init__(scraper)
+        self.__username = username
+
+    def do(self):
+        """ Get the id of a username """
+
+        link = constants.INSTAGRAM_USER_INFO_URL_DEFAULT.format(self.__username)
+        actions.GoToLink(self._scraper, link).do()
+        result = self._scraper.web_driver.page_source
+        soup = BeautifulSoup(result, 'html.parser')
+        try:
+            data = json.loads(soup.text)
+            return data['graphql']['user']['id']
+        except (JSONDecodeError, KeyError) as err:
+            logger.error('could not retrieve user id: %s', str(err))
+
+    def on_fail(self):
+        pass
diff --git a/instagram_scraper/scraper.py b/instagram_scraper/scraper.py
@@ -48,6 +48,7 @@ def __start_web_driver(self):
 
         driver_options = ChromeOptions()
         driver_options.add_experimental_option('excludeSwitches', ['enable-logging'])
+        driver_options.add_argument('--mute-audio')
         driver_options.headless = not self.__headful
 
         webdriver_path = None
@@ -130,6 +131,8 @@ def __init_scrape_stories(self, user):
                   + str(stories_amount) + ' image(s)/video(s) will be downloaded from stories: '
                   + self.__c_style.RESET_ALL)
             actions.ScrapeStories(self, user, stories_amount).do()
+        else:
+            print('no stories found')
 
     def __filter_post_links(self, user):
         """
@@ -158,9 +161,16 @@ def init_scrape_users(self, users):
 
             user.create_user_output_directories()
 
+            # Retrieve the id using requests
             userid = get_data.get_id_by_username_from_ig(user.username)
+
+            # Retrieve the id using actions if previous function has failed
+            if userid is None:
+                userid = actions.GetId(self, user.username).do()
+
+            # Continue if id not found (username does not exists)
             if userid is None:
-                print(self.__c_fore.RED + 'username not found' + self.__c_style.RESET_ALL)
+                print(self.__c_fore.RED + 'could not load user profile' + self.__c_style.RESET_ALL)
                 continue
 
             actions.ScrapeDisplay(self, user).do()
@@ -253,6 +263,10 @@ def cookies_accepted(self, accepted):
     def web_driver(self):
         return self.__web_driver
 
+    @property
+    def login_username(self):
+        return self.__login_username
+
     @property
     def database(self):
         return self.__database

diff --git a/setup.py b/setup.py
@@ -5,15 +5,15 @@
     long_description = fh.read()
 
 name = 'igscraper'
-version = '1.1.9'
+version = '1.1.10'
 
 requires = [
     'colorama>=0.4.3',
     'requests>=2.24.0',
     'selenium>=3.141.0',
     'urllib3>=1.25.10',
     'bs4>=0.0.1',
-    'get-chromedriver>=1.1.9'
+    'get-chromedriver>=1.1.10'
 ]
 
 setup(