Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added script to check for index discrepancies. #116

Merged
merged 1 commit into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions .github/workflows/checkIndexes.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
name: Check Indexed File Links

on:
workflow_dispatch:
inputs:
select_index:
type: choice
description: Check Test and/or Live ES indexed items
options:
- Test
- Live
- Both
regex:
type: string
description: Regex to use when searching for indexed items
default: "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib"
fix:
type: boolean
description: Fix the broken links with new ones?
default: false

push:
branches:
- main # This will trigger on every push (merge) to the 'main' branch

schedule:
- cron: "*/30 * * * *" # This will run every 30 minutes

jobs:
manual_run:
if: ${{ github.event_name == 'workflow_dispatch' }}
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install -r scripts/requirements/check_index.txt

- name: Check Indexed Links - Live
if: ${{ github.event.inputs.select_index == 'Live' || github.event.inputs.select_index == 'Both' }}
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "${{ github.event.inputs.regex }}" "--log_only" ${{ !github.event.inputs.fix }}
continue-on-error: true # Ensure the workflow continues

- name: Check Indexed Links - Test
if: ${{ github.event.inputs.select_index == 'Test' || github.event.inputs.select_index == 'Both' }}
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "${{ github.event.inputs.regex }}" "--log_only" ${{ !github.event.inputs.fix }}
continue-on-error: true # Ensure the workflow continues

push_to_main_run:
if: ${{ github.event_name == 'push' }}
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install -r scripts/requirements/check_index.txt

- name: Check Indexed Links - Live
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib"
continue-on-error: true # Ensure the workflow continues

- name: Check Indexed Links - Test
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib"
continue-on-error: true # Ensure the workflow continues

scheduled_run:
if: ${{ github.event_name == 'schedule' }}
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install -r scripts/requirements/check_index.txt

- name: Check Indexed Links - Live
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib"
continue-on-error: true # Ensure the workflow continues

- name: Check Indexed Links - Test
run: |
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib"
continue-on-error: true # Ensure the workflow continues
47 changes: 47 additions & 0 deletions scripts/check_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import sys, json, argparse, requests

import classes.class_gh as gh
import classes.class_es as es

if __name__ == "__main__":
# Get arguments
parser = argparse.ArgumentParser(description="Upload directories as release assets.")
parser.add_argument("gh_repo", help="Github repository name, e.g., 'username/repo'", type=str)
parser.add_argument("gh_token", help="GitHub Token", type=str)
parser.add_argument("es_host", help="ES instance host value", type=str)
parser.add_argument("es_user", help="ES instance user value", type=str)
parser.add_argument("es_password", help="ES instance password value", type=str)
parser.add_argument("es_index", help="ES instance index value", type=str)
parser.add_argument("--es_regex", help="Regex to use to fetch indexed items", type=str, default=".+")
parser.add_argument("--log_only", help="Regex to use to fetch indexed items", type=bool, default=False)
args = parser.parse_args()

es_instance = es.index(
es_host=args.es_host, es_user=args.es_user, es_password=args.es_password,
index=args.es_index, token=args.gh_token
)

gh_instance = gh.repo(args.gh_repo, args.gh_token)

es_instance.fetch(regex=args.es_regex)

headers = {
'Authorization': f'token {args.gh_token}'
}

err = False
for indexed_item in es_instance.indexed_items:
asset_status = requests.get(indexed_item['source']['download_link'], headers=headers)
if es_instance.Status.ERROR.value == asset_status.status_code: ## code 404 - error, reindex with correct download link
err = True
print("%sERROR: Asset \"%s\" download link is incorrect. - %s" % (es_instance.Colors.FAIL, indexed_item['source']['name'], indexed_item['source']['download_link']))
if not args.log_only:
package_name = (json.loads(asset_status.text))['name']
url = gh_instance.asset_fetch_url_api(package_name, loose=False)
indexed_item['source']['download_link'] = url
es_instance.update(indexed_item['doc']['type'], indexed_item['doc']['id'], indexed_item['source'])
else: ## code 200 - success, no need to reindex
print("%sOK: Asset \"%s\" download link is correct. - %s" % (es_instance.Colors.OKBLUE, indexed_item['source']['name'], indexed_item['source']['download_link']))

if err and args.log_only:
sys.exit(-1)
138 changes: 138 additions & 0 deletions scripts/classes/class_es.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import re, time
from elasticsearch import Elasticsearch
from enum import Enum

class index():
class Status(Enum):
SUCCESS = 200
ERROR = 404

class Colors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'

@staticmethod
def init(es_host, es_user, es_password, retry=None):
retry_check = 10
if retry:
retry_check = retry
num_of_retries = 1
while True:
print(f"Trying to connect to ES. Connection retry: {num_of_retries}")
es = Elasticsearch([es_host], http_auth=(es_user, es_password))
if es.ping():
break
# Wait for 1 second and try again if connection fails
if retry_check == num_of_retries:
# Exit if it fails 10 times, something is wrong with the server
raise ValueError("Connection to ES failed!")
num_of_retries += 1
es = None

time.sleep(1)

return es

@staticmethod
def response(es: Elasticsearch, index, query_size=5000):
# Search query to use
query_search = {
"size": query_size,
"query": {
"match_all": {}
}
}

# Search the base with provided query
num_of_retries = 1
while num_of_retries <= 10:
try:
response = es.search(index=index, body=query_search)
if not response['timed_out']:
break
except:
print("Executing search query - retry number %i" % num_of_retries)
num_of_retries += 1

return response

@staticmethod
def find_item(items, check):
for index in items:
if 'name' in index:
if index['name'] == check:
return True
return False

@staticmethod
def api_index(es: Elasticsearch, doc_index, doc_type, doc_id, doc_body):
return es.index(
index=doc_index,
doc_type=doc_type,
id=doc_id,
body=doc_body
)

def __init__(self, es_host, es_user, es_password, index, token, retry=None):
self.es_instance = self.init(es_host, es_user, es_password, retry)
self.index = index
self.token = token
self.indexed_items = []

def fetch(self, regex=r'.+', query_size=5000):
pattern = re.compile(regex)
response = self.response(self.es_instance, self.index, query_size)

for eachHit in response['hits']['hits']:
if 'name' in eachHit['_source']:
if pattern.match(eachHit['_source']['name']):
self.indexed_items.append(
{
'doc': {
'index': eachHit['_index'],
'type': eachHit['_type'],
'id': eachHit['_id']
},
'source': eachHit['_source']
}
)

def exists(self, check, query_size=5000):
## Did not use ES exists function as it requires doc_type and doc_id
## For future use it is better to search by name only
response = self.response(self.es_instance, self.index, query_size)
return self.find_item([index['_source'] for index in response['hits']['hits']], check)

def create(self, doc_type, doc_id, doc_body):
response = self.api_index(self.es_instance, self.index, doc_type, doc_id, doc_body)
if not 'created' == response['result']:
raise ValueError("%s%s failed to index to %s!" % (self.Colors.FAIL, doc_id, self.index))
else:
print("%sINFO: Asset \"%s\" created. - %s" % (self.Colors.OKGREEN, doc_body['name'], doc_body['download_link']))

def update(self, doc_type, doc_id, doc_body):
response = self.api_index(self.es_instance, self.index, doc_type, doc_id, doc_body)
if response['created'] and 'created' == response['result']:
print("%sWARNING: Asset \"%s\" created instead of updating. - %s" % (self.Colors.WARNING, doc_body['name'], doc_body['download_link']))
elif not 'updated' == response['result']:
raise ValueError("%s%s failed to update on %s!" % (self.Colors.FAIL, doc_id, self.index))
else:
print("%sINFO: Asset \"%s\" updated. - %s" % (self.Colors.OKGREEN, doc_body['name'], doc_body['download_link']))

def delete(self, doc_type, doc_id):
response = self.es_instance.delete(
index=self.index,
doc_type=doc_type,
id=doc_id
)
if not 'deleted' == response['result']:
raise ValueError("%s%s failed to delete from %s!" % (self.Colors.FAIL, doc_id, self.index))
else:
print("%sINFO: Asset \"%s\" deleted." % (self.Colors.OKGREEN, doc_id))
82 changes: 82 additions & 0 deletions scripts/classes/class_gh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import requests
from enum import Enum

class repo():
class Status(Enum):
SUCCESS = 200

class Colors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'

@staticmethod
def fetch_all_assets(url, token):
headers = {
'Authorization': f'token {token}'
}

asset_list = []

response = requests.get(url, headers=headers)
response.raise_for_status()
assets = response.json()

# If no more assets, break out of loop
if not assets:
return None

for asset in assets.get('assets', []):
asset_list.append(asset)

return asset_list

@staticmethod
def fetch_asset(assets, asset_name, loose=False):
for asset in assets:
if loose:
if asset_name in asset['name']:
return asset
else:
if asset['name'] == asset_name:
return asset
return None

def __init__(self, repo, token, release_id='latest'):
self.repo = repo
self.token = token
if 'latest' == release_id:
self.repo_url = f'https://api.github.com/repos/{repo}/releases/latest'
else:
self.repo_url = f'https://api.github.com/repos/{repo}/releases/{release_id}/assets'
self.assets = self.fetch_all_assets(self.repo_url, self.token)

def asset_exists(self, asset_name, loose=False):
## TODO - implement asset checking in the future
return

def asset_fetch_url_api(self, asset_name, loose=False):
asset = self.fetch_asset(self.assets, asset_name, loose)
if asset:
return asset['url']
return None

def asset_fetch_url_browser(self, asset_name, loose=False):
asset = self.fetch_asset(self.assets, asset_name, loose)
if asset:
return asset['browser_download_url']
return None

def asset_upload(self, asset_path, asset_name):
## TODO - implement asset upload in the future
return

def asset_delete(self, asset_path, asset_name):
## TODO - implement asset deletion in the future
return
2 changes: 2 additions & 0 deletions scripts/requirements/check_index.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
requests
elasticsearch==7.13.4
Loading