diff --git a/aws_tools/dynamodb_handler.py b/aws_tools/dynamodb_handler.py index 19cd143a..fa3cab85 100644 --- a/aws_tools/dynamodb_handler.py +++ b/aws_tools/dynamodb_handler.py @@ -73,9 +73,24 @@ def delete_item(self, keys): Key=keys ) - def query_items(self, query=None, only_fields_with_values=True): + def get_item_count(self): + """ + get number of items in table - one caveat is that this value may be off since AWS only updates it every 6 hours + :return: + """ + return self.table.item_count + + def query_items(self, query=None, only_fields_with_values=True, queryChunkLimit=-1): + """ + gets items from database + :param query: + :param only_fields_with_values: + :param queryChunkLimit: not an absolute count, but a threshold where we stop fetching more chunks + (if negative then no limit, but will read all chunks) + :return: + """ filter_expression = None - if query and len(query) > 1: + if query and len(query) >= 1: for field, value in iteritems(query): value2 = None if isinstance(value, dict) and 'condition' in value and 'value' in value: @@ -124,11 +139,33 @@ def query_items(self, query=None, only_fields_with_values=True): else: response = self.table.scan() - if response and 'Items' in response: - return response['Items'] - else: + if not response or not('Items' in response): return None + # finished if there is no more data to read + if not('LastEvaluatedKey' in response): + return response['Items'] + + items = response['Items'] + + # read chunks until end or threshold is reached + while 'LastEvaluatedKey' in response: + if filter_expression is not None: + response = self.table.scan( + FilterExpression=filter_expression, + ExclusiveStartKey = response['LastEvaluatedKey'] + ) + else: + response = self.table.scan(ExclusiveStartKey = response['LastEvaluatedKey']) + + if response and ('Items' in response): + items += response['Items'] + + itemCount = len(items) + if (queryChunkLimit >= 0) and (itemCount >= queryChunkLimit): + break + + return items RESERVED_WORDS = [ 'ABORT', diff --git a/lambda_handlers/dashboard_handler.py b/lambda_handlers/dashboard_handler.py index bdf61586..86751e3a 100644 --- a/lambda_handlers/dashboard_handler.py +++ b/lambda_handlers/dashboard_handler.py @@ -24,4 +24,13 @@ def _handle(self, event, context): 'job_table_name': self.retrieve(event['vars'], 'job_table_name', 'Environment Vars'), 'module_table_name': self.retrieve(event['vars'], 'module_table_name', 'Environment Vars') } - return TxManager(**env_vars).generate_dashboard() + + max_failures = TxManager.MAX_FAILURES + try: + querystring = event['api-gateway']['params']['querystring'] + max_failures = int(querystring['failures']) + + except: + pass + + return TxManager(**env_vars).generate_dashboard(max_failures) diff --git a/manager/manager.py b/manager/manager.py index 7c042f6a..8dc419bf 100644 --- a/manager/manager.py +++ b/manager/manager.py @@ -11,10 +11,10 @@ from job import TxJob from module import TxModule - class TxManager(object): JOB_TABLE_NAME = 'tx-job' MODULE_TABLE_NAME = 'tx-module' + MAX_FAILURES = 10 def __init__(self, api_url=None, gogs_url=None, cdn_url=None, cdn_bucket=None, quiet=False, aws_access_key_id=None, aws_secret_access_key=None, @@ -151,6 +151,13 @@ def setup_job(self, data): ], } + def get_job_count(self): + """ + get number of jobs in database - one caveat is that this value may be off since AWS only updates it every 6 hours + :return: + """ + return self.job_db_handler.get_item_count() + def list_jobs(self, data, must_be_authenticated=True): if must_be_authenticated: if 'gogs_user_token' not in data: @@ -452,7 +459,7 @@ def update_module(self, module): def delete_module(self, module): return self.module_db_handler.delete_item({'name': module.name}) - def generate_dashboard(self): + def generate_dashboard(self, max_failures = MAX_FAILURES): """ Generate page with metrics indicating configuration of tx-manager. @@ -469,12 +476,21 @@ def generate_dashboard(self): } items = sorted(self.module_db_handler.query_items(), key=lambda k: k['name']) - totalJobs = self.list_jobs({},False) - if items and len(items): + moduleNames = [] + for item in items: + moduleNames.append(item["name"]) + + registeredJobs = self.list_jobs({ "convert_module" : { "condition" : "is_in", "value" : moduleNames} + }, False) + totalJobCount = self.get_job_count() + registeredJobCount = len(registeredJobs) + if registeredJobCount > totalJobCount: # sanity check since AWS can be slow to update job count reported in table (every 6 hours) + totalJobCount = registeredJobCount + self.logger.info(" Found: " + str(len(items)) + " item[s] in tx-module") - body = BeautifulSoup('

TX-Manager Dashboard

Module Attributes


', + body = BeautifulSoup('

TX-Manager Dashboard

Module Attributes


', 'html.parser') for item in items: # self.logger.info(json.dumps(item)) @@ -484,7 +500,7 @@ def generate_dashboard(self): '' + str(moduleName) + '', 'html.parser')) - jobs = self.get_jobs_for_module(totalJobs, moduleName) + jobs = self.get_jobs_for_module(registeredJobs, moduleName) self.get_jobs_counts(jobs) # TBD the following code almosts walks the db record replacing next 11 lines @@ -549,7 +565,7 @@ def generate_dashboard(self): str(self.jobs_total) + '', 'html.parser')) - self.get_jobs_counts(totalJobs) + self.get_jobs_counts(registeredJobs) body.table.append(BeautifulSoup( 'Total Jobs', 'html.parser')) @@ -565,11 +581,65 @@ def generate_dashboard(self): 'Failures:' + str(self.jobs_failures) + '', 'html.parser')) + body.table.append(BeautifulSoup( + 'Unregistered:' + + str(totalJobCount - self.jobs_total) + '', + 'html.parser')) body.table.append(BeautifulSoup( 'Total:' + - str(self.jobs_total) + '', + str(totalJobCount) + '', + 'html.parser')) + + # build job failures table + + jobFailures = self.get_job_failures(registeredJobs) + body.append(BeautifulSoup('

Failed Jobs

', 'html.parser')) + failureTable = BeautifulSoup('
','html.parser') + failureTable.table.append(BeautifulSoup( + '' + 'Time' + 'Errors' + 'Repo' + 'PreConvert' + 'Converted' + 'Destination' + 'Job ID', 'html.parser')) + gogs_url = self.gogs_url + if gogs_url == None : + gogs_url = 'https://git.door43.org' + + for i in range(0, max_failures): + if i >= len(jobFailures): + break + + item = jobFailures[i] + + try : + identifier = item['identifier'] + owner_name, repo_name, commit_id = identifier.split('/') + sourceSubPath = 'u/{0}/{1}'.format(owner_name, repo_name) + cdn_bucket = item['cdn_bucket'] + destinationUrl = 'https://{0}/u/{1}/{2}/{3}/build_log.json'.format(cdn_bucket, owner_name, repo_name, commit_id) + repoUrl = gogs_url + "/" + sourceSubPath + preconvertedUrl = item['source'] + convertedUrl = item['output'] + failureTable.table.append(BeautifulSoup( + '' + + '' + item['created_at'] + '' + + '' + str(item['errors']) + '' + + '' + repoUrl + '' + + '' + preconvertedUrl + '' + + '' + convertedUrl + '' + + '' + destinationUrl + '' + + '' + item['job_id'] + '' + + '', + 'html.parser')) + except: + pass + + body.append(failureTable) dashboard['body'] = body.prettify('UTF-8') else: self.logger.info("No modules found.") @@ -592,20 +662,26 @@ def get_jobs_counts(self, jobs): self.jobs_failures = 0 self.jobs_success = 0 for job in jobs: - try: - errors = job['errors'] - if len(errors) > 0: - self.jobs_failures+=1 - continue + errors = job['errors'] + if len(errors) > 0: + self.jobs_failures+=1 + continue - warnings = job['warnings'] - if len(warnings) > 0: - self.jobs_warnings+=1 - continue + warnings = job['warnings'] + if len(warnings) > 0: + self.jobs_warnings+=1 + continue - self.jobs_success+=1 + self.jobs_success+=1 - except: - self.jobs_failures+=1 + def get_job_failures(self, jobs): + failedJobs = [] + for job in jobs: + errors = job['errors'] + if len(errors) > 0: + failedJobs.append(job) + + failedJobs = sorted(failedJobs, key=lambda k: k['created_at'], reverse=True) + return failedJobs diff --git a/setup.py b/setup.py index 057983c5..401baa6d 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ def read(f_name): setup( name='tx-manager', - version='0.2.62', + version='0.2.63', packages=[ 'client', 'manager', diff --git a/test-setup.py b/test-setup.py index 10e10f27..a7e7f076 100644 --- a/test-setup.py +++ b/test-setup.py @@ -2,7 +2,7 @@ setup( name='tx-manager', - version='0.2.62', + version='0.2.63', packages=[ 'client', 'manager', diff --git a/tests/lambda_handlers_tests/test_dashboardHandler.py b/tests/lambda_handlers_tests/test_dashboardHandler.py index e96e9edc..413bcb58 100644 --- a/tests/lambda_handlers_tests/test_dashboardHandler.py +++ b/tests/lambda_handlers_tests/test_dashboardHandler.py @@ -2,14 +2,18 @@ import mock from unittest import TestCase from lambda_handlers.dashboard_handler import DashboardHandler +from manager.manager import TxManager +def new_generate_dashboard( max_failures): + return max_failures; # return the parameter for testing -class TestListJobsHandler(TestCase): +class DashboardHandlerTest(TestCase): @mock.patch('manager.manager.TxManager.setup_resources') @mock.patch('manager.manager.TxManager.generate_dashboard') def test_handle(self, mock_generate_dashboard, mock_setup_resources): - mock_generate_dashboard.return_value = None + mock_generate_dashboard.side_effect=new_generate_dashboard + expectedMaxFailures = TxManager.MAX_FAILURES event = { 'data': {}, 'body-json': {}, @@ -23,5 +27,33 @@ def test_handle(self, mock_generate_dashboard, mock_setup_resources): } } handler = DashboardHandler() - self.assertIsNone(handler.handle(event, None)) + maxFailures = handler.handle(event, expectedMaxFailures) + self.assertEqual(maxFailures, expectedMaxFailures) + + @mock.patch('manager.manager.TxManager.setup_resources') + @mock.patch('manager.manager.TxManager.generate_dashboard') + def test_dashboard_handler_max_two(self, mock_generate_dashboard, mock_setup_resources): + mock_generate_dashboard.side_effect=new_generate_dashboard + expectedMaxFailures = 2 + event = { + "vars" : { + 'data': {}, + 'body-json': {}, + 'api_url': 'https://test-api.door43.org', + 'gogs_url': 'https://git.door43.org', + 'cdn_url': 'https://test-cdn.door43.org', + 'job_table_name': 'test-tx-job', + 'module_table_name': 'test-tx-module' + }, + "api-gateway" : { + "params" : { + 'querystring': { + 'failures': str(expectedMaxFailures) + } + } + } + } + handler = DashboardHandler() + maxFailures = handler.handle(event, expectedMaxFailures) + self.assertEqual(maxFailures, expectedMaxFailures) diff --git a/tests/manager_tests/mock_utils.py b/tests/manager_tests/mock_utils.py index a24c7437..1d4a8891 100644 --- a/tests/manager_tests/mock_utils.py +++ b/tests/manager_tests/mock_utils.py @@ -19,12 +19,16 @@ def get_item(keys): return data[key] return None - def query_items(*ignored): + def get_item_count(): + return len(data.values()) + + def query_items(query=None, only_fields_with_values=True, queryChunkLimit=-1): return data.values() handler = MagicMock() handler.get_item = MagicMock(side_effect=get_item) handler.query_items = MagicMock(side_effect=query_items) + handler.get_item_count = MagicMock(side_effect=get_item_count) handler.setup_resources = MagicMock(side_effects=setup_resources) handler.mock_data = data return handler diff --git a/tests/manager_tests/test_manager.py b/tests/manager_tests/test_manager.py index dd7d1b2a..f80b78c9 100644 --- a/tests/manager_tests/test_manager.py +++ b/tests/manager_tests/test_manager.py @@ -57,7 +57,12 @@ def setUpClass(cls): "input_format": "md", "output_format": "html", "convert_module": "module1", - "errors" : [ "error" ] + "errors" : [ "error" ], + "cdn_bucket" : "cdn.door43.org", + "identifier" : "tx-manager-test-data/en-ulb-jud/6778aa89bd", + "output" : "https://test-cdn.door43.org/tx-manager-test-data/en-ulb-jud/6778aa89bd.zip", + "source" : "https://s3-us-west-2.amazonaws.com/tx-webhook-client/preconvert/e8eb91750d.zip", + "created_at": "2017-04-12T17:03:06Z" }, "2": { "job_id": "2", @@ -102,6 +107,42 @@ def setUpClass(cls): "input_format": "md", "output_format": "html", "convert_module": "module2" + }, + "8": { + "job_id": "8", + "status": "requested", + "resource_type": "obs", + "input_format": "html", + "output_format": "pdf", + "convert_module": "module4" + }, + "9": { + "job_id": "9", + "status": "requested", + "resource_type": "obs", + "input_format": "md", + "output_format": "html", + "convert_module": "module2", + "identifier" : "tx-manager-test-data/en-ulb-jud/6778aa89bZ", + "output" : "https://test-cdn.door43.org/tx-manager-test-data/en-ulb-jud/6778aa89bdZ.zip", + "source" : "https://s3-us-west-2.amazonaws.com/tx-webhook-client/preconvert/e8eb91750dZ.zip", + "errors" : [ "error1", "error2" ], + "cdn_bucket" : "cdn.door43.org", + "created_at": "2017-03-12T17:03:076Z" + }, + "10": { + "job_id": "10", + "status": "requested", + "resource_type": "obs", + "input_format": "md", + "output_format": "html", + "convert_module": "module2", + "identifier" : "tx-manager-test-data/en-ulb-jud/6778aa89bZZ", + "output" : "https://test-cdn.door43.org/tx-manager-test-data/en-ulb-jud/6778aa89bdZZ.zip", + "source" : "https://s3-us-west-2.amazonaws.com/tx-webhook-client/preconvert/e8eb91750dZZ.zip", + "errors" : [ "error1","error2","error3" ], + "cdn_bucket" : "cdn.door43.org", + "created_at": "2017-05-12T17:03:04Z" } }, keyname="job_id") cls.mock_module_db = mock_utils.mock_db_handler(data={ @@ -591,23 +632,23 @@ def test_generate_dashboard(self): # the title should be tX-Manager Dashboard self.assertEqual(dashboard['title'], 'tX-Manager Dashboard') soup = BeautifulSoup(dashboard['body'], 'html.parser') - # there should be a table tag - self.assertIsNotNone(soup.find('table')) + # there should be a status table tag + statusTable = soup.find('table', id="status") moduleName = 'module1' expectedRowCount = 12 expectedSuccessCount = 2 expectedWarningCount = 2 expectedFailureCount = 1 - self.validateModule(soup, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + self.validateModule(statusTable, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, expectedWarningCount) moduleName = 'module2' expectedRowCount = 11 expectedSuccessCount = 2 expectedWarningCount = 0 - expectedFailureCount = 0 - self.validateModule(soup, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedFailureCount = 2 + self.validateModule(statusTable, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, expectedWarningCount) moduleName = 'module3' @@ -615,37 +656,111 @@ def test_generate_dashboard(self): expectedSuccessCount = 0 expectedWarningCount = 0 expectedFailureCount = 0 - self.validateModule(soup, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + self.validateModule(statusTable, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount) + + moduleName = 'module4' + expectedRowCount = 0 + expectedSuccessCount = 0 + expectedWarningCount = 0 + expectedFailureCount = 0 + self.validateModule(statusTable, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, expectedWarningCount) moduleName = 'totals' - expectedRowCount = 4 - expectedSuccessCount = 4 + expectedRowCount = 5 + expectedSuccessCount = 5 + expectedWarningCount = 2 + expectedFailureCount = 3 + expectedUnregistered = 0 + self.validateModule(statusTable, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount, expectedUnregistered) + + failureTable = soup.find('table', id="failed") + expectedFailureCount = 3 + self.validateFailureTable(failureTable, expectedFailureCount) + + def test_generate_dashboard_max_two(self): + expectedMaxFailures = 2 + manager = TxManager() + dashboard = manager.generate_dashboard(expectedMaxFailures) + + # the title should be tX-Manager Dashboard + self.assertEqual(dashboard['title'], 'tX-Manager Dashboard') + soup = BeautifulSoup(dashboard['body'], 'html.parser') + # there should be a status table tag + statusTable = soup.find('table', id="status") + + moduleName = 'module1' + expectedRowCount = 12 + expectedSuccessCount = 2 expectedWarningCount = 2 expectedFailureCount = 1 - self.validateModule(soup, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + self.validateModule(statusTable, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, expectedWarningCount) + moduleName = 'module2' + expectedRowCount = 11 + expectedSuccessCount = 2 + expectedWarningCount = 0 + expectedFailureCount = 2 + self.validateModule(statusTable, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount) + + moduleName = 'module3' + expectedRowCount = 9 + expectedSuccessCount = 0 + expectedWarningCount = 0 + expectedFailureCount = 0 + self.validateModule(statusTable, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount) + + moduleName = 'totals' + expectedRowCount = 5 + expectedSuccessCount = 5 + expectedWarningCount = 2 + expectedFailureCount = 3 + expectedUnregistered = 0 + self.validateModule(statusTable, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount, expectedUnregistered) + + failureTable = soup.find('table', id="failed") + expectedFailureCount = expectedMaxFailures + self.validateFailureTable(failureTable, expectedFailureCount) + # helper methods # - def validateModule(self, soup, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, - expectedWarningCount): - module = soup.table.findAll('tr', id=lambda x: x and x.startswith(moduleName + '-')) + def validateFailureTable(self, table, expectedFailureCount): + self.assertIsNotNone(table) + module = table.findAll('tr', id=lambda x: x and x.startswith('failure-')) + rowCount = len(module) + self.assertEquals(rowCount, expectedFailureCount) + + def validateModule(self, table, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount, expectedUnregistered = 0): + self.assertIsNotNone(table) + module = table.findAll('tr', id=lambda x: x and x.startswith(moduleName + '-')) rowCount = len(module) self.assertEquals(rowCount, expectedRowCount) - successCount = self.getCountFromRow(soup, moduleName + '-job-success') - self.assertEquals(successCount, expectedSuccessCount) - warningCount = self.getCountFromRow(soup, moduleName + '-job-warning') - self.assertEquals(warningCount, expectedWarningCount) - failureCount = self.getCountFromRow(soup, moduleName + '-job-failure') - self.assertEquals(failureCount, expectedFailureCount) - expectedTotalCount = expectedFailureCount + expectedSuccessCount + expectedWarningCount - totalCount = self.getCountFromRow(soup, moduleName + '-job-total') - self.assertEquals(totalCount, expectedTotalCount) - - def getCountFromRow(self, soup, rowID): - success = soup.table.findAll('tr', id=lambda x: x == rowID) - dataFields = success[0].findAll("td") + if expectedRowCount > 0: + successCount = self.getCountFromRow(table, moduleName + '-job-success') + self.assertEquals(successCount, expectedSuccessCount) + warningCount = self.getCountFromRow(table, moduleName + '-job-warning') + self.assertEquals(warningCount, expectedWarningCount) + failureCount = self.getCountFromRow(table, moduleName + '-job-failure') + self.assertEquals(failureCount, expectedFailureCount) + unregisteredCount = self.getCountFromRow(table, moduleName + '-job-unregistered') + self.assertEquals(unregisteredCount, expectedUnregistered) + expectedTotalCount = expectedFailureCount + expectedSuccessCount + expectedWarningCount + expectedUnregistered + totalCount = self.getCountFromRow(table, moduleName + '-job-total') + self.assertEquals(totalCount, expectedTotalCount) + + def getCountFromRow(self, table, rowID): + rows = table.findAll('tr', id=lambda x: x == rowID) + if len(rows) == 0: + return 0 + + dataFields = rows[0].findAll("td") strings = dataFields[1].stripped_strings # get data from second column count = -1 for string in strings: