From 1d690f109b5a86332b5a72ef5a5a8dd202082b3e Mon Sep 17 00:00:00 2001 From: strahi-linux Date: Mon, 14 Oct 2024 13:06:03 +0200 Subject: [PATCH] Added script to check for index discrepancies. --- .github/workflows/checkIndexes.yaml | 111 +++++++++++++++++++++ scripts/check_indexes.py | 47 +++++++++ scripts/classes/class_es.py | 138 +++++++++++++++++++++++++++ scripts/classes/class_gh.py | 82 ++++++++++++++++ scripts/requirements/check_index.txt | 2 + 5 files changed, 380 insertions(+) create mode 100644 .github/workflows/checkIndexes.yaml create mode 100644 scripts/check_indexes.py create mode 100644 scripts/classes/class_es.py create mode 100644 scripts/classes/class_gh.py create mode 100644 scripts/requirements/check_index.txt diff --git a/.github/workflows/checkIndexes.yaml b/.github/workflows/checkIndexes.yaml new file mode 100644 index 000000000..e506cd5cb --- /dev/null +++ b/.github/workflows/checkIndexes.yaml @@ -0,0 +1,111 @@ +name: Check Indexed File Links + +on: + workflow_dispatch: + inputs: + select_index: + type: choice + description: Check Test and/or Live ES indexed items + options: + - Test + - Live + - Both + regex: + type: string + description: Regex to use when searching for indexed items + default: "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib" + fix: + type: boolean + description: Fix the broken links with new ones? + default: false + + push: + branches: + - main # This will trigger on every push (merge) to the 'main' branch + + schedule: + - cron: "*/30 * * * *" # This will run every 30 minutes + +jobs: + manual_run: + if: ${{ github.event_name == 'workflow_dispatch' }} + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install -r scripts/requirements/check_index.txt + + - name: Check Indexed Links - Live + if: ${{ github.event.inputs.select_index == 'Live' || github.event.inputs.select_index == 'Both' }} + run: | + python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "${{ github.event.inputs.regex }}" "--log_only" ${{ !github.event.inputs.fix }} + continue-on-error: true # Ensure the workflow continues + + - name: Check Indexed Links - Test + if: ${{ github.event.inputs.select_index == 'Test' || github.event.inputs.select_index == 'Both' }} + run: | + python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "${{ github.event.inputs.regex }}" "--log_only" ${{ !github.event.inputs.fix }} + continue-on-error: true # Ensure the workflow continues + + push_to_main_run: + if: ${{ github.event_name == 'push' }} + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install -r scripts/requirements/check_index.txt + + - name: Check Indexed Links - Live + run: | + python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib" + continue-on-error: true # Ensure the workflow continues + + - name: Check Indexed Links - Test + run: | + python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib" + continue-on-error: true # Ensure the workflow continues + + scheduled_run: + if: ${{ github.event_name == 'schedule' }} + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install -r scripts/requirements/check_index.txt + + - name: Check Indexed Links - Live + run: | + python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib" + continue-on-error: true # Ensure the workflow continues + + - name: Check Indexed Links - Test + run: | + python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib" + continue-on-error: true # Ensure the workflow continues diff --git a/scripts/check_indexes.py b/scripts/check_indexes.py new file mode 100644 index 000000000..93e1b8131 --- /dev/null +++ b/scripts/check_indexes.py @@ -0,0 +1,47 @@ +import sys, json, argparse, requests + +import classes.class_gh as gh +import classes.class_es as es + +if __name__ == "__main__": + # Get arguments + parser = argparse.ArgumentParser(description="Upload directories as release assets.") + parser.add_argument("gh_repo", help="Github repository name, e.g., 'username/repo'", type=str) + parser.add_argument("gh_token", help="GitHub Token", type=str) + parser.add_argument("es_host", help="ES instance host value", type=str) + parser.add_argument("es_user", help="ES instance user value", type=str) + parser.add_argument("es_password", help="ES instance password value", type=str) + parser.add_argument("es_index", help="ES instance index value", type=str) + parser.add_argument("--es_regex", help="Regex to use to fetch indexed items", type=str, default=".+") + parser.add_argument("--log_only", help="Regex to use to fetch indexed items", type=bool, default=False) + args = parser.parse_args() + + es_instance = es.index( + es_host=args.es_host, es_user=args.es_user, es_password=args.es_password, + index=args.es_index, token=args.gh_token + ) + + gh_instance = gh.repo(args.gh_repo, args.gh_token) + + es_instance.fetch(regex=args.es_regex) + + headers = { + 'Authorization': f'token {args.gh_token}' + } + + err = False + for indexed_item in es_instance.indexed_items: + asset_status = requests.get(indexed_item['source']['download_link'], headers=headers) + if es_instance.Status.ERROR.value == asset_status.status_code: ## code 404 - error, reindex with correct download link + err = True + print("%sERROR: Asset \"%s\" download link is incorrect. - %s" % (es_instance.Colors.FAIL, indexed_item['source']['name'], indexed_item['source']['download_link'])) + if not args.log_only: + package_name = (json.loads(asset_status.text))['name'] + url = gh_instance.asset_fetch_url_api(package_name, loose=False) + indexed_item['source']['download_link'] = url + es_instance.update(indexed_item['doc']['type'], indexed_item['doc']['id'], indexed_item['source']) + else: ## code 200 - success, no need to reindex + print("%sOK: Asset \"%s\" download link is correct. - %s" % (es_instance.Colors.OKBLUE, indexed_item['source']['name'], indexed_item['source']['download_link'])) + + if err and args.log_only: + sys.exit(-1) diff --git a/scripts/classes/class_es.py b/scripts/classes/class_es.py new file mode 100644 index 000000000..149e4a1d6 --- /dev/null +++ b/scripts/classes/class_es.py @@ -0,0 +1,138 @@ +import re, time +from elasticsearch import Elasticsearch +from enum import Enum + +class index(): + class Status(Enum): + SUCCESS = 200 + ERROR = 404 + + class Colors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + @staticmethod + def init(es_host, es_user, es_password, retry=None): + retry_check = 10 + if retry: + retry_check = retry + num_of_retries = 1 + while True: + print(f"Trying to connect to ES. Connection retry: {num_of_retries}") + es = Elasticsearch([es_host], http_auth=(es_user, es_password)) + if es.ping(): + break + # Wait for 1 second and try again if connection fails + if retry_check == num_of_retries: + # Exit if it fails 10 times, something is wrong with the server + raise ValueError("Connection to ES failed!") + num_of_retries += 1 + es = None + + time.sleep(1) + + return es + + @staticmethod + def response(es: Elasticsearch, index, query_size=5000): + # Search query to use + query_search = { + "size": query_size, + "query": { + "match_all": {} + } + } + + # Search the base with provided query + num_of_retries = 1 + while num_of_retries <= 10: + try: + response = es.search(index=index, body=query_search) + if not response['timed_out']: + break + except: + print("Executing search query - retry number %i" % num_of_retries) + num_of_retries += 1 + + return response + + @staticmethod + def find_item(items, check): + for index in items: + if 'name' in index: + if index['name'] == check: + return True + return False + + @staticmethod + def api_index(es: Elasticsearch, doc_index, doc_type, doc_id, doc_body): + return es.index( + index=doc_index, + doc_type=doc_type, + id=doc_id, + body=doc_body + ) + + def __init__(self, es_host, es_user, es_password, index, token, retry=None): + self.es_instance = self.init(es_host, es_user, es_password, retry) + self.index = index + self.token = token + self.indexed_items = [] + + def fetch(self, regex=r'.+', query_size=5000): + pattern = re.compile(regex) + response = self.response(self.es_instance, self.index, query_size) + + for eachHit in response['hits']['hits']: + if 'name' in eachHit['_source']: + if pattern.match(eachHit['_source']['name']): + self.indexed_items.append( + { + 'doc': { + 'index': eachHit['_index'], + 'type': eachHit['_type'], + 'id': eachHit['_id'] + }, + 'source': eachHit['_source'] + } + ) + + def exists(self, check, query_size=5000): + ## Did not use ES exists function as it requires doc_type and doc_id + ## For future use it is better to search by name only + response = self.response(self.es_instance, self.index, query_size) + return self.find_item([index['_source'] for index in response['hits']['hits']], check) + + def create(self, doc_type, doc_id, doc_body): + response = self.api_index(self.es_instance, self.index, doc_type, doc_id, doc_body) + if not 'created' == response['result']: + raise ValueError("%s%s failed to index to %s!" % (self.Colors.FAIL, doc_id, self.index)) + else: + print("%sINFO: Asset \"%s\" created. - %s" % (self.Colors.OKGREEN, doc_body['name'], doc_body['download_link'])) + + def update(self, doc_type, doc_id, doc_body): + response = self.api_index(self.es_instance, self.index, doc_type, doc_id, doc_body) + if response['created'] and 'created' == response['result']: + print("%sWARNING: Asset \"%s\" created instead of updating. - %s" % (self.Colors.WARNING, doc_body['name'], doc_body['download_link'])) + elif not 'updated' == response['result']: + raise ValueError("%s%s failed to update on %s!" % (self.Colors.FAIL, doc_id, self.index)) + else: + print("%sINFO: Asset \"%s\" updated. - %s" % (self.Colors.OKGREEN, doc_body['name'], doc_body['download_link'])) + + def delete(self, doc_type, doc_id): + response = self.es_instance.delete( + index=self.index, + doc_type=doc_type, + id=doc_id + ) + if not 'deleted' == response['result']: + raise ValueError("%s%s failed to delete from %s!" % (self.Colors.FAIL, doc_id, self.index)) + else: + print("%sINFO: Asset \"%s\" deleted." % (self.Colors.OKGREEN, doc_id)) diff --git a/scripts/classes/class_gh.py b/scripts/classes/class_gh.py new file mode 100644 index 000000000..b102850ea --- /dev/null +++ b/scripts/classes/class_gh.py @@ -0,0 +1,82 @@ +import requests +from enum import Enum + +class repo(): + class Status(Enum): + SUCCESS = 200 + + class Colors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + @staticmethod + def fetch_all_assets(url, token): + headers = { + 'Authorization': f'token {token}' + } + + asset_list = [] + + response = requests.get(url, headers=headers) + response.raise_for_status() + assets = response.json() + + # If no more assets, break out of loop + if not assets: + return None + + for asset in assets.get('assets', []): + asset_list.append(asset) + + return asset_list + + @staticmethod + def fetch_asset(assets, asset_name, loose=False): + for asset in assets: + if loose: + if asset_name in asset['name']: + return asset + else: + if asset['name'] == asset_name: + return asset + return None + + def __init__(self, repo, token, release_id='latest'): + self.repo = repo + self.token = token + if 'latest' == release_id: + self.repo_url = f'https://api.github.com/repos/{repo}/releases/latest' + else: + self.repo_url = f'https://api.github.com/repos/{repo}/releases/{release_id}/assets' + self.assets = self.fetch_all_assets(self.repo_url, self.token) + + def asset_exists(self, asset_name, loose=False): + ## TODO - implement asset checking in the future + return + + def asset_fetch_url_api(self, asset_name, loose=False): + asset = self.fetch_asset(self.assets, asset_name, loose) + if asset: + return asset['url'] + return None + + def asset_fetch_url_browser(self, asset_name, loose=False): + asset = self.fetch_asset(self.assets, asset_name, loose) + if asset: + return asset['browser_download_url'] + return None + + def asset_upload(self, asset_path, asset_name): + ## TODO - implement asset upload in the future + return + + def asset_delete(self, asset_path, asset_name): + ## TODO - implement asset deletion in the future + return diff --git a/scripts/requirements/check_index.txt b/scripts/requirements/check_index.txt new file mode 100644 index 000000000..2268e07af --- /dev/null +++ b/scripts/requirements/check_index.txt @@ -0,0 +1,2 @@ +requests +elasticsearch==7.13.4