-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #116 from MikroElektronika/improvement/fixed-indexing
Added script to check for index discrepancies.
- Loading branch information
Showing
5 changed files
with
380 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
name: Check Indexed File Links | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
select_index: | ||
type: choice | ||
description: Check Test and/or Live ES indexed items | ||
options: | ||
- Test | ||
- Live | ||
- Both | ||
regex: | ||
type: string | ||
description: Regex to use when searching for indexed items | ||
default: "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib" | ||
fix: | ||
type: boolean | ||
description: Fix the broken links with new ones? | ||
default: false | ||
|
||
push: | ||
branches: | ||
- main # This will trigger on every push (merge) to the 'main' branch | ||
|
||
schedule: | ||
- cron: "*/30 * * * *" # This will run every 30 minutes | ||
|
||
jobs: | ||
manual_run: | ||
if: ${{ github.event_name == 'workflow_dispatch' }} | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.x' | ||
|
||
- name: Install Dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install -r scripts/requirements/check_index.txt | ||
- name: Check Indexed Links - Live | ||
if: ${{ github.event.inputs.select_index == 'Live' || github.event.inputs.select_index == 'Both' }} | ||
run: | | ||
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "${{ github.event.inputs.regex }}" "--log_only" ${{ !github.event.inputs.fix }} | ||
continue-on-error: true # Ensure the workflow continues | ||
|
||
- name: Check Indexed Links - Test | ||
if: ${{ github.event.inputs.select_index == 'Test' || github.event.inputs.select_index == 'Both' }} | ||
run: | | ||
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "${{ github.event.inputs.regex }}" "--log_only" ${{ !github.event.inputs.fix }} | ||
continue-on-error: true # Ensure the workflow continues | ||
|
||
push_to_main_run: | ||
if: ${{ github.event_name == 'push' }} | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.x' | ||
|
||
- name: Install Dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install -r scripts/requirements/check_index.txt | ||
- name: Check Indexed Links - Live | ||
run: | | ||
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib" | ||
continue-on-error: true # Ensure the workflow continues | ||
|
||
- name: Check Indexed Links - Test | ||
run: | | ||
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib" | ||
continue-on-error: true # Ensure the workflow continues | ||
|
||
scheduled_run: | ||
if: ${{ github.event_name == 'schedule' }} | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.x' | ||
|
||
- name: Install Dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install -r scripts/requirements/check_index.txt | ||
- name: Check Indexed Links - Live | ||
run: | | ||
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_LIVE }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib" | ||
continue-on-error: true # Ensure the workflow continues | ||
|
||
- name: Check Indexed Links - Test | ||
run: | | ||
python -u scripts/check_indexes.py ${{ github.repository }} ${{ secrets.GITHUB_TOKEN }} ${{ secrets.ES_HOST }} ${{ secrets.ES_USER }} ${{ secrets.ES_PASSWORD }} ${{ secrets.ES_INDEX_TEST }} "--es_regex" "arm_gcc_clang|arm_mikroc|clocks|database|dspic|^images$|mikroe_utils|pic|preinit|riscv|schemas|unit_test_lib" | ||
continue-on-error: true # Ensure the workflow continues |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import sys, json, argparse, requests | ||
|
||
import classes.class_gh as gh | ||
import classes.class_es as es | ||
|
||
if __name__ == "__main__": | ||
# Get arguments | ||
parser = argparse.ArgumentParser(description="Upload directories as release assets.") | ||
parser.add_argument("gh_repo", help="Github repository name, e.g., 'username/repo'", type=str) | ||
parser.add_argument("gh_token", help="GitHub Token", type=str) | ||
parser.add_argument("es_host", help="ES instance host value", type=str) | ||
parser.add_argument("es_user", help="ES instance user value", type=str) | ||
parser.add_argument("es_password", help="ES instance password value", type=str) | ||
parser.add_argument("es_index", help="ES instance index value", type=str) | ||
parser.add_argument("--es_regex", help="Regex to use to fetch indexed items", type=str, default=".+") | ||
parser.add_argument("--log_only", help="Regex to use to fetch indexed items", type=bool, default=False) | ||
args = parser.parse_args() | ||
|
||
es_instance = es.index( | ||
es_host=args.es_host, es_user=args.es_user, es_password=args.es_password, | ||
index=args.es_index, token=args.gh_token | ||
) | ||
|
||
gh_instance = gh.repo(args.gh_repo, args.gh_token) | ||
|
||
es_instance.fetch(regex=args.es_regex) | ||
|
||
headers = { | ||
'Authorization': f'token {args.gh_token}' | ||
} | ||
|
||
err = False | ||
for indexed_item in es_instance.indexed_items: | ||
asset_status = requests.get(indexed_item['source']['download_link'], headers=headers) | ||
if es_instance.Status.ERROR.value == asset_status.status_code: ## code 404 - error, reindex with correct download link | ||
err = True | ||
print("%sERROR: Asset \"%s\" download link is incorrect. - %s" % (es_instance.Colors.FAIL, indexed_item['source']['name'], indexed_item['source']['download_link'])) | ||
if not args.log_only: | ||
package_name = (json.loads(asset_status.text))['name'] | ||
url = gh_instance.asset_fetch_url_api(package_name, loose=False) | ||
indexed_item['source']['download_link'] = url | ||
es_instance.update(indexed_item['doc']['type'], indexed_item['doc']['id'], indexed_item['source']) | ||
else: ## code 200 - success, no need to reindex | ||
print("%sOK: Asset \"%s\" download link is correct. - %s" % (es_instance.Colors.OKBLUE, indexed_item['source']['name'], indexed_item['source']['download_link'])) | ||
|
||
if err and args.log_only: | ||
sys.exit(-1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
import re, time | ||
from elasticsearch import Elasticsearch | ||
from enum import Enum | ||
|
||
class index(): | ||
class Status(Enum): | ||
SUCCESS = 200 | ||
ERROR = 404 | ||
|
||
class Colors: | ||
HEADER = '\033[95m' | ||
OKBLUE = '\033[94m' | ||
OKCYAN = '\033[96m' | ||
OKGREEN = '\033[92m' | ||
WARNING = '\033[93m' | ||
FAIL = '\033[91m' | ||
ENDC = '\033[0m' | ||
BOLD = '\033[1m' | ||
UNDERLINE = '\033[4m' | ||
|
||
@staticmethod | ||
def init(es_host, es_user, es_password, retry=None): | ||
retry_check = 10 | ||
if retry: | ||
retry_check = retry | ||
num_of_retries = 1 | ||
while True: | ||
print(f"Trying to connect to ES. Connection retry: {num_of_retries}") | ||
es = Elasticsearch([es_host], http_auth=(es_user, es_password)) | ||
if es.ping(): | ||
break | ||
# Wait for 1 second and try again if connection fails | ||
if retry_check == num_of_retries: | ||
# Exit if it fails 10 times, something is wrong with the server | ||
raise ValueError("Connection to ES failed!") | ||
num_of_retries += 1 | ||
es = None | ||
|
||
time.sleep(1) | ||
|
||
return es | ||
|
||
@staticmethod | ||
def response(es: Elasticsearch, index, query_size=5000): | ||
# Search query to use | ||
query_search = { | ||
"size": query_size, | ||
"query": { | ||
"match_all": {} | ||
} | ||
} | ||
|
||
# Search the base with provided query | ||
num_of_retries = 1 | ||
while num_of_retries <= 10: | ||
try: | ||
response = es.search(index=index, body=query_search) | ||
if not response['timed_out']: | ||
break | ||
except: | ||
print("Executing search query - retry number %i" % num_of_retries) | ||
num_of_retries += 1 | ||
|
||
return response | ||
|
||
@staticmethod | ||
def find_item(items, check): | ||
for index in items: | ||
if 'name' in index: | ||
if index['name'] == check: | ||
return True | ||
return False | ||
|
||
@staticmethod | ||
def api_index(es: Elasticsearch, doc_index, doc_type, doc_id, doc_body): | ||
return es.index( | ||
index=doc_index, | ||
doc_type=doc_type, | ||
id=doc_id, | ||
body=doc_body | ||
) | ||
|
||
def __init__(self, es_host, es_user, es_password, index, token, retry=None): | ||
self.es_instance = self.init(es_host, es_user, es_password, retry) | ||
self.index = index | ||
self.token = token | ||
self.indexed_items = [] | ||
|
||
def fetch(self, regex=r'.+', query_size=5000): | ||
pattern = re.compile(regex) | ||
response = self.response(self.es_instance, self.index, query_size) | ||
|
||
for eachHit in response['hits']['hits']: | ||
if 'name' in eachHit['_source']: | ||
if pattern.match(eachHit['_source']['name']): | ||
self.indexed_items.append( | ||
{ | ||
'doc': { | ||
'index': eachHit['_index'], | ||
'type': eachHit['_type'], | ||
'id': eachHit['_id'] | ||
}, | ||
'source': eachHit['_source'] | ||
} | ||
) | ||
|
||
def exists(self, check, query_size=5000): | ||
## Did not use ES exists function as it requires doc_type and doc_id | ||
## For future use it is better to search by name only | ||
response = self.response(self.es_instance, self.index, query_size) | ||
return self.find_item([index['_source'] for index in response['hits']['hits']], check) | ||
|
||
def create(self, doc_type, doc_id, doc_body): | ||
response = self.api_index(self.es_instance, self.index, doc_type, doc_id, doc_body) | ||
if not 'created' == response['result']: | ||
raise ValueError("%s%s failed to index to %s!" % (self.Colors.FAIL, doc_id, self.index)) | ||
else: | ||
print("%sINFO: Asset \"%s\" created. - %s" % (self.Colors.OKGREEN, doc_body['name'], doc_body['download_link'])) | ||
|
||
def update(self, doc_type, doc_id, doc_body): | ||
response = self.api_index(self.es_instance, self.index, doc_type, doc_id, doc_body) | ||
if response['created'] and 'created' == response['result']: | ||
print("%sWARNING: Asset \"%s\" created instead of updating. - %s" % (self.Colors.WARNING, doc_body['name'], doc_body['download_link'])) | ||
elif not 'updated' == response['result']: | ||
raise ValueError("%s%s failed to update on %s!" % (self.Colors.FAIL, doc_id, self.index)) | ||
else: | ||
print("%sINFO: Asset \"%s\" updated. - %s" % (self.Colors.OKGREEN, doc_body['name'], doc_body['download_link'])) | ||
|
||
def delete(self, doc_type, doc_id): | ||
response = self.es_instance.delete( | ||
index=self.index, | ||
doc_type=doc_type, | ||
id=doc_id | ||
) | ||
if not 'deleted' == response['result']: | ||
raise ValueError("%s%s failed to delete from %s!" % (self.Colors.FAIL, doc_id, self.index)) | ||
else: | ||
print("%sINFO: Asset \"%s\" deleted." % (self.Colors.OKGREEN, doc_id)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import requests | ||
from enum import Enum | ||
|
||
class repo(): | ||
class Status(Enum): | ||
SUCCESS = 200 | ||
|
||
class Colors: | ||
HEADER = '\033[95m' | ||
OKBLUE = '\033[94m' | ||
OKCYAN = '\033[96m' | ||
OKGREEN = '\033[92m' | ||
WARNING = '\033[93m' | ||
FAIL = '\033[91m' | ||
ENDC = '\033[0m' | ||
BOLD = '\033[1m' | ||
UNDERLINE = '\033[4m' | ||
|
||
@staticmethod | ||
def fetch_all_assets(url, token): | ||
headers = { | ||
'Authorization': f'token {token}' | ||
} | ||
|
||
asset_list = [] | ||
|
||
response = requests.get(url, headers=headers) | ||
response.raise_for_status() | ||
assets = response.json() | ||
|
||
# If no more assets, break out of loop | ||
if not assets: | ||
return None | ||
|
||
for asset in assets.get('assets', []): | ||
asset_list.append(asset) | ||
|
||
return asset_list | ||
|
||
@staticmethod | ||
def fetch_asset(assets, asset_name, loose=False): | ||
for asset in assets: | ||
if loose: | ||
if asset_name in asset['name']: | ||
return asset | ||
else: | ||
if asset['name'] == asset_name: | ||
return asset | ||
return None | ||
|
||
def __init__(self, repo, token, release_id='latest'): | ||
self.repo = repo | ||
self.token = token | ||
if 'latest' == release_id: | ||
self.repo_url = f'https://api.github.com/repos/{repo}/releases/latest' | ||
else: | ||
self.repo_url = f'https://api.github.com/repos/{repo}/releases/{release_id}/assets' | ||
self.assets = self.fetch_all_assets(self.repo_url, self.token) | ||
|
||
def asset_exists(self, asset_name, loose=False): | ||
## TODO - implement asset checking in the future | ||
return | ||
|
||
def asset_fetch_url_api(self, asset_name, loose=False): | ||
asset = self.fetch_asset(self.assets, asset_name, loose) | ||
if asset: | ||
return asset['url'] | ||
return None | ||
|
||
def asset_fetch_url_browser(self, asset_name, loose=False): | ||
asset = self.fetch_asset(self.assets, asset_name, loose) | ||
if asset: | ||
return asset['browser_download_url'] | ||
return None | ||
|
||
def asset_upload(self, asset_path, asset_name): | ||
## TODO - implement asset upload in the future | ||
return | ||
|
||
def asset_delete(self, asset_path, asset_name): | ||
## TODO - implement asset deletion in the future | ||
return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
requests | ||
elasticsearch==7.13.4 |