diff --git a/README-tXArchitecture.rst b/README-tXArchitecture.rst new file mode 100644 index 00000000..cf941a44 --- /dev/null +++ b/README-tXArchitecture.rst @@ -0,0 +1,494 @@ +tX Development Architecture +=========================== + +This document explains the layout of the translationConvertor (tX) +conversion platform and how the components of the system should interact +with one another. + +If you just want to *use* the tX API, see `tX API Example +Usage `__ + +Keep reading if you want to contribute to tX. + +Goals +----- + +tX is intended to be a conversion tool for the content in the `Door43 +Platform `__. The goal is to support several +different input formats, output formats, and resource types. + +Development goals are: + +- Keep the system modular, in order to: +- Encourage others to contribute and make it simple to do so +- Contain development, testing, and deployment to each individual + component +- Constrain feature, bugfixes, and security issues to a smaller + codebase for each component +- Continuous Deployment, which means +- Automated testing is required +- Continuous integration is required +- Checks and balances on our *process* +- RESTful API utilizing JSON + +Infrastructure +-------------- + +Overview +~~~~~~~~ + +All code for tX is run by `AWS Lambda `__. The `AWS API +Gateway `__ service is what +provides routing from URL requests to Lambda functions. Data and any +required persistent metadata are stored in `AWS S3 `__ buckets. +This is a "serverless" API. + +Developers use `Apex `__, `Travis +CI `__, and +`Coveralls `__. + +Permissions (mostly for accessing S3 buckets) are managed by the +``role`` assigned to each Lambda function. + +Modules may be written in any language supported by AWS Lambda +(including some that are available via "shimming"). As of July, 2016, +this list includes: + +- Java (v8) +- Python (v2.7) +- Node.js (v0.10 or v4.3) +- Go lang (any version) + +Modules MUST all present an API endpoint that the other components of +the system can use. Modules MAY present API endpoints that the public +can use. + + +Separating Production from Development and Test +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We want our code to not know/care if it is running in production, +development or test environments. Yet there are plenty of variables and +locations that data and files are stored that vary from the three, such as +different bucket names between our two AWS accounts, since all bucket +names on AWS must be unique. + +So that the clients, tx-manager and convert modules don't have to worry +about this, everything that varies from environment will be set up in +the API Gateway Stage Variables. These are variables we set up in AWS +for a particular API's URL. Along with the payload sent by the +requesting client, these variables will also be put into the "event" +variable in the Lambda handle function. + +For example, such variables may be (test \| development \| production): + +- cdn\_bucket = "test-cdn.door43.org" \| "dev-cdn.door43.org" \| "cdn.door43.org" +- api\_bucket = "test-api.door43.org" \| "dev-api.door43.org" \| "api.door43.org" +- door43\_bucket = "test-door43.org" \| "dev-door43.org" \| "door43.org" +- api\_url = "https://test-api.door43.org" \| "https://dev-api.door43.org" \| "https://api.door43.org" +- gogs\_user\_token = "" \| "" \| "" +- gogs\_username = "" +- env = "test" or "dev" or "prod" (just in case you want to still do something different based on environment in your code) + +Test Environment +~~~~~~~~~~~~~~~~~~~~~~~ + +The test environment should use the WA AWS account. There are 3 +test buckets that have been created that mirror the production buckets: + +- test-api.door43.org - for tx-manager to manage data for tX (only /tx + namespace should be used) (public access disabled on this) +- test-cdn.door43.org - for conversion modules to upload their output + to (only /tx namespace should be used) (public access enabled on + this) +- test-door43.org - For Jekyll and /u generated files to upload to + (public access enabled on this) + +Use `apex deploy` to upload code for lambda functions to test environment. + +Development Environment +~~~~~~~~~~~~~~~~~~~~~~~ + +The development environment should use the Door43 AWS account. There are 3 +development buckets that have been created that mirror the production buckets: + +- dev-api.door43.org - for tx-manager to manage data for tX (only /tx + namespace should be used) (public access disabled on this) +- dev-cdn.door43.org - for conversion modules to upload their output + to (only /tx namespace should be used) (public access enabled on + this) +- dev-door43.org - For Jekyll and /u generated files to upload to + (public access enabled on this) + +The ``develop`` branch for each repo should automatically deploy to this +account and make use of the above buckets. + +Production Environment +~~~~~~~~~~~~~~~~~~~~~~ + +The production environment should use the Door43 AWS account. The +production buckets are: + +- api.door43.org - for tx-manager to manage data for tX (only /tx + namespace should be used) (public access disabled on this) +- cdn.door43.org - for conversion modules to upload their output to + (only /tx namespace should be used) (public access enabled on this) +- door43.org - For Jekyll and /u generated files to upload to (public + access enabled on this) + +The ``master`` branch for each repo should automatically deploy to this +account and make use of the above buckets. + +Modules +------- + +Every part of tX is broken into components referred to as +``tX modules``. Each tX module has one or more functions that it +provides to the overall system. The list of tX modules is given here, +with a full description in its respective heading below. + +- `tX Webhook Client <#tx-webhook-client>`__ - Handles webhooks from + git.door43.org (Gogs) to format the repo files, massaging them based + on resource and format into a flat directory structure and zips it up + to invoke a job request with the `tX Manager + Module <#tx-manager-module>`__. +- `tX Manager Module <#tx-manager-module>`__ - Manages the registration + of `conversion modules <#tx-conversion-modules>`__ and handles job + requests for conversions. Makes a callback to the + `client <#tx-webhook-client>`__ when conversion job is complete. +- `tX Authorization Module <#tx-authorization-module>`__ (actually just + the python-gogs-client) +- `tX Conversion Modules <#tx-conversion-modules>`__ - modules that + handle the conversion from one file format to another of one or more + resources +- `tX Door43 Module <#tx-door43-module>`__ - When a conversion job is + completed, it is invoked to make the converted file accessible + through the door43.org site, setting up a new revision page for the + corresponding Gogs repository. It also maintain stats on the + particular project or project revision, such as views and stars + +tX Manager Module +~~~~~~~~~~~~~~~~~ + +The `tX Manager +Module `__ provides +access to three functions: + +- Maintains the registry for all `tX Conversion + Modules <#tx-conversion-modules>`__ +- Authorization for requests via the ```tx-auth`` + module <#tx-authorization-module>`__ +- Accepts user credentials via ``HTTP Basic Auth`` (over HTTPS) to + verify the calling client is a gogs user +- Counts requests made by each token [not implemented] +- Blocks access if requests per minute reaches a certain threshold [not + implemented] +- Handles the public API paths that a tX Convertion modules register +- Job queue management. Accepts job requests with parameters given to + it, the most important being a URL to a zip file of the source files, + the resource type, input format, and output format. These files must + be in a flat ZIP file (no sub-directories, at least not for the files + of the input format), conforming to what the tX Converter expects +- Makes a callback to client when job is completed or has failed, if a + callback URL was given by the client when the job was requested + +The tX manager does NOT concern itself with nor has knowledge of: \* +``git.door43.org`` repositories \* ``door43.org`` pages + +tX Authorization Module +~~~~~~~~~~~~~~~~~~~~~~~ + +The `tX Authorization +Module `__ is +an ``authorization`` module for the tX system. In reality, this is just +the python-gogs-client. The ``tx-manager`` module uses it to perform +authorization of request. The module handles the following: + +- Grants access to the API based on a Gogs user token + +tX Conversion Modules +~~~~~~~~~~~~~~~~~~~~~ + +Conversion modules include (some are still to be implemented): + +- `tx-md2html `__ - Converts Markdown to HTML (obs, ta, tn, tw, tq) +- `tx-md2pdf `__ - Converts Markdown to PDF (obs, ta, tn, tw, tq) +- `tx-md2docx `__ - Converts Markdown to DOCX (obs, ta, tn, tw, tq) +- `tx-md2epub `__ - Converts Markdown to ePub (obs, ta, tn, tw, tq) +- `tx-usfm2html `__ - Converts USFM to HTML (bible) +- `tx-usfm2pdf `__ - Converts USFM to PDF (bible) +- `tx-usfm2docx `__ - Converts USFM to DOCX (bible) +- `tx-usfm2epub `__ - Converts USFM to ePub (bible) + +Each conversion module accepts a specific type of text format as its +input and the module returns a specific type of output document. For +example, there is a `md2pdf `__ module +that converts Markdown text into a rendered PDF. The conversion modules +also require that you specify the resource type (e.g. obs, ta, tn, tw or +tq), which affects the formatting of the output document. + +Input Format Types +^^^^^^^^^^^^^^^^^^ + +There are currently two accepted input format types: + +- Markdown -``md`` +- Unified Standard Format Markers - ``usfm`` + +A few notes on input formatting: + +- Conversion modules *do not do pre-processing of the text*. The data + supplied must be well formed. +- Conversion modules expect *a single file* either: +- A plaintext file of the appropriate format (``md`` or ``usfm``). +- A zip file with multiple plaintext files of the appropriate format. + +In the case of a zip file, the conversion module should process the +files in *alphabetical* order. According to our ``obs`` file naming +convention and the ``usfm`` standard, this process should yield the +correct output in both cases. + +Output Format Types +^^^^^^^^^^^^^^^^^^^ + +For each type of input format, the following output formats are +supported: + +- PDF - ``pdf`` +- DOCX - ``docx`` +- HTML - ``html`` + +Resource Types +^^^^^^^^^^^^^^ + +Each of these resource types affects the expected input and the rendered +output of the text. The recognized resource types are: + +- Open Bible Stories - ``obs`` +- Scripture/Bible - ``bible`` +- translationNotes - ``tn`` +- translationWords - ``tw`` +- translationQuestions - ``tq`` +- translationAcademy - ``ta`` + +Available Conversion Options +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Conversion modules specify a list of ``options`` that they accept to +help format the output document. Every conversion module MUST support +these options: + +- ``"language": "en"`` - Defaults to ``en`` if not provided, MUST be a + valid IETF code, may affect font used +- ``"css": "http://some.url/your_custom_css"`` - A CSS file that you + provide. You can override or extend any of the CSS in the templates + with your own values. + +Conversion modules MAY support these options: + +- ``"columns": [1, 2, 3, 4]`` - Not available for ``obs`` input +- ``"page_size": ["A4", "A5", "Letter", "Statement"]`` - Not available + for HTML output +- ``"line_spacing": "100%"`` +- ``"toc_levels": [1, 2, 3, 4, ...]`` - To specify how many heading + levels you want to appear in your TOC. +- ``"page_margins": { "top": ".5in","right": ".5in","bottom": ".5in","left": ".5in" }`` + - If you want to override the default page margins for PDF or DOCX + output. + +Deploying Modules +^^^^^^^^^^^^^^^^^ + +Each module is initially deployed to AWS Lambda via the ``apex`` +command. After this, Travis CI is configured to manage continuous +deployment of the module (see `Deploying to AWS from Travis +CI `__). + +Continuous deployment of the module should be setup such that: + +- the ``master`` branch is deployed to ``production`` whenever it is + updated +- the ``develop`` branch is deployed to ``development`` whenever it is + updated + +The deployment process looks like this: + +- Code in progress lives in a feature-named branch until the developer + is happy and automated tests pass. +- Code is peer-reviewed, then +- Merged into ``develop`` until automated testing passes and it + integrates correctly in ``development``. +- Merged into ``master`` which triggers the auto-deployment + +Registering a Module +^^^^^^^^^^^^^^^^^^^^ + +Every module (except ``tx-manager``) MUST register itself with +``tx-manager``. A module MUST provide the following information to +``tx-manager``: + +- Public endpoints (for ``tx-manager`` to present) +- Private endpoints (will not be published by ``tx-manager``) +- Module type (one of ``conversion``, ``authorization``, ``utility``) + +A conversion module MUST also provide: + +- Input format types accepted +- Output format types accepted +- Resource types accepted +- Conversion options accepted + +Example registration for ``md2pdf``: + +Request + +:: + + POST https://api.door43.org/tx/module + + { + "name": "tx-md2pdf_convert", + "version": "1", + "type": "conversion", + "resource_types": [ "obs", "bible" ], + "input_format": [ "md" ], + "output_format": [ "pdf" ], + "options": [ "language", "css", "line_spacing" ], + "private_links": [ ], + "public_links": [ + { + "href": "/md2pdf", + "rel": "list", + "method": "GET" + }, + { + "href": "/md2pdf", + "rel": "create", + "method": "POST" + }, + ] + } + +Response: + +:: + + 201 Created + + { + "name": "md2pdf", + "version": "1", + "type": "conversion", + "resource_types": [ "obs", "bible" ], + "input_format": [ "md" ], + "output_format": [ "pdf" ], + "options": [ "language", "css", "line_spacing" ], + "private_links": [ ], + "public_links": [ + { + "href": "/md2pdf", + "rel": "list", + "method": "GET" + }, + { + "href": "/md2pdf", + "rel": "create", + "method": "POST" + }, + ] + } + +tX Webhook Client +~~~~~~~~~~~~~~~~~ + +The `tX Webhook +Client `__ is a client +to tX. The purpose of this client is to pre-process the ``git`` repos +from Gogs' webhook notifications, send them through tX, and upload the +resulting HTML files to the ``cdn.door43.org`` bucket. The process looks +like this: + +When a Gogs webhook is triggered: \* Accepts the default webhook +notification from ``git.door43.org`` \* Gets the data from the +repository for the given commit (via HTTPS request that returns a zip +file) \* Identifies the Resource Type (via name of repo or +``manifest.json`` file) \* Formats the request (turns the repo into +valid Markdown or USFM file(s), then creates a zip file with the files +being in the root of the archive) \* Sends the valid data (in zip +format) through an API call to the `tX Manager +Module <#tx-manager-module>`__, requesting HTML output, which it then +should get a confirmation (JSON) that the job has been queued ('status' += 'requested') \* Uploads an initial ``build_log.json`` file to the +``cdn.door43.org`` bucket as +``u////build_log.json`` with information returned +from the call to the tX Manager (this file will be updated when job is +completed) \* Uploads the repo's ``manifest.json`` file to the +``cdn.door43.org`` bucket as ``u////manifest.json`` +\* Returns its own JSON response which will be seen in the Gogs' webhook +request results, stating the request was made, the source ZIP and the +expected output ZIP locations + +When callback is made: \* Extract each file from the resulting output +ZIP file to the ``cdn.door43.org`` bucket with the prefix key of +``u////`` \* Updates the +``u////build_log.json`` in the ``cdn.door43.org`` +bucket with the information given by tX Manager through the callback +(e.g. conversion status, log, warnings, errors, timestamps, etc.) + +tX Webhook Client does NOT concern itself with: \* Converting files for +presentation on ``door43.org`` + +tX Door43 Module +~~~~~~~~~~~~~~~~ + +The `tX Door43 +Module `__ contains +processes that will update the ``door43.org`` bucket/site when +conversion jobs are completed. It works behind the scenes, so is not an +API. Its tasks include: + +- convert the files for presentation on ``door43.org`` when a + conversion job is completed and files have been deployed to the + ``cdn.door43.org`` bucket, applying a template and other styling and + JavaScript, and deploy them to the ``door43.org`` bucket, prefixed + with ``u///`` +- Update stats of a project or revision such as views, followers and + stars from ``git.door43.org`` + +Including Python Packages in a Lambda Function +---------------------------------------------- + +Requirements for a Python script need to reside within the function's +directory that calls them. A requirement for the ``convert`` function +should exist within ``functions/convert/``. + +The list of requirements for a function should be in a +``requirements.txt`` file within that function's directory, for example: +``functions/convert/requirements.txt``. + +Requirements *must* be installed before deploying to Lambda. For +example: + +``pip install -r functions/convert/requirements.txt -t functions/convert/`` + +The ``-t`` option tells pip to install the files into the specified +target directory. This ensures that the Lambda environment has direct +access to the dependency. + +If you have any Python files in subdirectories that also have +dependencies, you can import the ones available in the main function by +using ``sys.path.append('/var/task/')``. + +Lastly, if you install dependencies for a function you need to include +the following in an ``.apexignore`` file: + +:: + + *.dist-info + +For Reference +------------- + +There is a similar API that has good documentation at +https://developers.zamzar.com/docs. This can be consulted if we run into +blockers or need examples of how to implement tX. diff --git a/README.rst b/README.rst index 340175ed..5788e215 100644 --- a/README.rst +++ b/README.rst @@ -19,14 +19,19 @@ develop: :target: https://coveralls.io/github/unfoldingWord-dev/tx-manager?branch=develop + +**NOTE: High level Architecture documentation is here\:** `tX Architecture`_. + + + tx-manager ========== -This is a python module used with tx-manager-lambda +This is a python module used with **tx-manager-lambda**. See the `tx-manager-lambda documentation `_ for details on setting up and deploying the Lambda function. -Project description at https://github.com/unfoldingWord-dev/door43.org/wiki/tX-Development-Architecture#tx-manager-module. +Project description at `tX Manager Module`_. -Issue for its creation at https://github.com/unfoldingWord-dev/door43.org/issues/53 +Issue for its creation at https://github.com/unfoldingWord-dev/door43.org/issues/53. tX Pipeline @@ -56,73 +61,74 @@ Request Conversion Job Using the Pipeline and the corresponding numbers above, this describes each part of the pipeline and how each are integrated, both with each other as well as the AWS Services that are used. -NOTE: This gives URLs and bucket names for development. For production, remove the `test-` prefix from domain or bucket name +NOTE: This gives URLs and bucket names for test. For development, replace the `test-` prefix from domain or bucket name with `dev-`. For production, remove the `test-` prefix from domain or bucket name. 1. Gogs (Git website) -When a repository is updated on `Gogs `_, the commit triggers all webhooks in the repo's settings. One of those webhooks, which our copy of Gogs sets up automatically for every new repo, is a call to https://test-api.door43.org/client/webhook (API Gateway -> Lambda function). + When a repository is updated on `Gogs `_, the commit triggers all webhooks in the repo's settings. One of those webhooks, which our copy of Gogs sets up automatically for every new repo, is a call to https://test-api.door43.org/client/webhook (API Gateway -> Lambda function). 2. Webhook (Lambda function - API Gateway triggered) -The webhook triggered in Gogs (#1) sends the commit payload to the AWS API Gateway `client` stage and the `webhook` method which triggers the `client_webhook Lambda function `_. + The webhook triggered in Gogs (#1) sends the commit payload to the AWS API Gateway `client` stage and the `webhook` method which triggers the `client_webhook Lambda function `_. -The webhook function expects the following variables in the payload: + The webhook function expects the following variables in the payload: -* data - the commit payload from Gogs -* api_url* - the base URL to the tX Manager API (e.g. https://test-api.door43.org) -* pre_convert_bucket* - the S3 bucket in which to put the zip file of the files to be converted (e.g. tx-webhook) -* cdn_bucket* - the S3 bucket in which the zip file of the converted files is to be found in client_callback (e.g. cdn.door43.org) -* gogs_url* - the URL to the Gogs site to verify user token (e.g. https://test.door43.org:3000) -* gogs_user_token* - a user token of a valid user to prove they are a user so we can track job requests (for the webclient we just have one user token for all requests, given by the API Gateway) + * data - the commit payload from Gogs + * api_url* - the base URL to the tX Manager API (e.g. https://test-api.door43.org) + * pre_convert_bucket* - the S3 bucket in which to put the zip file of the files to be converted (e.g. tx-webhook) + * cdn_bucket* - the S3 bucket in which the zip file of the converted files is to be found in client_callback (e.g. cdn.door43.org) + * gogs_url* - the URL to the Gogs site to verify user token (e.g. https://git.door43.org) + * gogs_user_token* - a user token of a valid user to prove they are a user so we can track job requests (for the webclient we just have one user token for all requests, given by the API Gateway) -*these variables are set up in the `'client' Stage Variables `_, so dev and prod gateways can have different variables + *these variables are set up in the `client` Stage Variables , so dev and prod gateways can have different variables* -The client_webhook function is responsible for standardizing both a manifest.json file and the resource containers from all types of repos committed to Gogs. Converters (#4) expect the files to be converted to be in a flat-level zip file, where all files to be converted (with the input file extension) are one file per chapter (Bible, OBS) and in alphabetical order for logging and display purposes. Once the files are zipped up and the zip file put at https://test-cdn.door43.org/temp//, the client webhook function requests a job by posting a request to https://test-api.door43.org/tx/job and exits. + The client_webhook function is responsible for standardizing both a manifest.json file and the resource containers from all types of repos committed to Gogs, and it will call a preprocessor (e.g. TsObsMarkdownPreprocessor) to handle this. Converters (#4) expect the files to be converted to be in a flat-level zip file, where all files to be converted (with the input file extension) are one file per chapter (Bible, OBS) and in alphabetical order for logging and display purposes. Once the files are zipped up and the zip file put at https://test-cdn.door43.org/temp//, the client webhook function requests a job by posting a request to https://test-api.door43.org/tx/job and exits. 3. Request Job (Lamdbda function - API Gateway triggered) -Request Job is triggered through a call to the AWS API Gateway, running the `request_job lambda function `_. This function expects the following variables in the payload: + Request Job is triggered through a call to the AWS API Gateway, running the `request_job lambda function `_. This function expects the following variables in the payload: -* gogs_url* - the URL to the Gogs site to verify user token (e.g. https://git.door43.org) -* api_url* - the base URL to the tX Manager API (e.g. https://test-api.door43.org) -* data - information about the job to performed. It contains the following variables: + * gogs_url* - the URL to the Gogs site to verify user token (e.g. https://git.door43.org) + * api_url* - the base URL to the tX Manager API (e.g. https://test-api.door43.org) + * data - information about the job to performed. It contains the following variables: - * gogs_user_token - a user token of a valid Gogs user - * cdn_bucket - the S3 bucket in which the zip file of the converted files is to placed - * source - The URL of the archive of files to convert (e.g. https://s3-us-west-2.amazonaws.com/test-tx-webhook/preconvert/0038b1d1-bf3b-11e6-8481-ed2b5603783b.zip) - * resource_type - The resource type (e.g. obs, ulb, udb, etc.) - * input_format - The input format of the files (e.g. md) - * output_format - The desired output format (e.g. html) + * gogs_user_token - a user token of a valid Gogs user + * cdn_bucket - the S3 bucket in which the zip file of the converted files is to placed + * source - The URL of the archive of files to convert (e.g. https://s3-us-west-2.amazonaws.com/test-tx-webhook/preconvert/0038b1d1-bf3b-11e6-8481-ed2b5603783b.zip) + * resource_type - The resource type (e.g. obs, ulb, udb, etc.) + * input_format - The input format of the files (e.g. md) + * output_format - The desired output format (e.g. html) -*these variables are set up in the `'tx' Stage Variables `_, so dev and prod gateways can have different variables + *these variables are set up in the* `'tx' Stage Variables `_ *, so dev and prod gateways can have different variables.* -From the above information, tX Manager's setup_job function will determine what converter to use for this job and will save this job request to the `tx-job table `_. Inserting this job into the DynamoDB will trigger the `tX Manager Start Job lambda function `_. + From the above information, tX Manager's setup_job function will determine what converter to use for this job and will save this job request to the `tx-job table `_. Inserting this job into the DynamoDB will trigger the `tX Manager Start Job lambda function `_. 4. Start Job (a) (Lamdbda function - DynamoDB tx-job table insert triggered) -The `Start Job lambda function `_ (Thus is NOT triggered through a call through the API. This is to separate the Request Job from the Start Job due to the 5 minute limit of execution time of a Lambda function) + The `Start Job lambda function `_ is triggered by a job being inserted into the DynamoDB `tx-job table `_ (Thus is NOT triggered through a call through the API. This is to separate the Request Job from the Start Job due to the 5 minute limit of execution time of a Lambda function) -This function will load the given record from the DB and populate a TxJob object. It will then send this to the converter determined in #3 from its input and output formats. A call to the converter is then made. + This function will load the given record from the DB and populate a TxJob object. It will then send this to the converter determined in #3 from its input and output formats. A call to the converter is then made. 5. [CONVERTER] (Lamdbda function - tX Manager triggered) -Each converter is responsible for converting a given input file type to a given output file type. It also can have one or more resource types it converts. It expects the URL of a zip file which it then downloads and unzips. It then converts all the files to another zip file, converting the files of the given input type to the given output type, and copies all other files as they are to the new archive. It uploads the archive to the given S3 bucket and file path. + Each converter is responsible for converting a given input file type to a given output file type. It also can have one or more resource types it converts. It expects the URL of a zip file which it then downloads and unzips. It then converts all the files to another zip file, converting the files of the given input type to the given output type, and copies all other files as they are to the new archive. It uploads the archive to the given S3 bucket and file path. -It also can perform checks at this point if there any warnings or errors and return those in the JSON object returned to the Start Job function (#4) + It also can perform checks at this point if there any warnings or errors and return those in the JSON object returned to the Start Job function (#4) 4. Start Job (b) (Lamdbda function - Return from [CONVERTER] #5) -Once the CONVERTER returns a status of warnings and errors (if any), the Start Job function calls the call back URL if one was given so the client can know the job was completed and if it was successful or not. + Once the CONVERTER returns a status of warnings and errors (if any), the Start Job function calls the call back URL if one was given so the client can know the job was completed and if it was successful or not. 6. Callback (Lamdbda function - API Gateway triggered) -When the `callback function `_ is called, the client looks to see if the job was a success and if it was, unzips the new archive and puts its contents in the test-cdn.door43.org bucket with the key prefix of `u///`. It puts the status of the build into a file and uploads to the same bucket with the key `u////build_log.json`. + When the `callback function `_ is called, the client looks to see if the job was a success and if it was, unzips the new archive and puts its contents in the test-cdn.door43.org bucket with the key prefix of `u///`. It puts the status of the build into a file and uploads to the same bucket with the key `u////build_log.json`. 7. Deploy to Door43 (Lamdbda function - S3 modified file triggered) -The uploading of build_log.json in #6 triggers the `Door43 Deploy function <`_. + The uploading of build_log.json in #6 triggers the `Door43 Deploy function `_. + + The Door43 Deploy function is what moves the HTML files converted by #5 and placed in the CDN bucket in #6 to live.door43.org and templates it based on the `door43.org layouts `_. It also generates header, status and navigation portions of the pages for each revision. -The Door43 Deploy function is what moves the HTML files converted by #5 and placed in the CDN bucket in #6 to live.door43.org and templates it based on the `door43.org layouts `_. It also generates header, status and navigation portions of the pages for each revision. Register Conversion Module -------------------------- @@ -136,4 +142,9 @@ the following variables: * output_format - the output format of the files to be generated, which is the extension of the file, such as "html" * resource_types - the resource type(s) accepted by the converter, such as "obs" -See `tx-md2html_register Lambda function `_. for an exmaple of a module registering itself. +See `tx-md2html_register Lambda function `_. for an example of a module registering itself. + + +.. _tX Architecture: + +.. include:: README-tXArchitecture.rst diff --git a/manager/manager.py b/manager/manager.py index 6ddd1025..7c042f6a 100644 --- a/manager/manager.py +++ b/manager/manager.py @@ -469,6 +469,7 @@ def generate_dashboard(self): } items = sorted(self.module_db_handler.query_items(), key=lambda k: k['name']) + totalJobs = self.list_jobs({},False) if items and len(items): self.logger.info(" Found: " + str(len(items)) + " item[s] in tx-module") @@ -477,11 +478,15 @@ def generate_dashboard(self): 'html.parser') for item in items: # self.logger.info(json.dumps(item)) - self.logger.info(item["name"]) + moduleName = item["name"] + self.logger.info(moduleName) body.table.append(BeautifulSoup( - '' + str(item["name"]) + '', + '' + str(moduleName) + '', 'html.parser')) + jobs = self.get_jobs_for_module(totalJobs, moduleName) + self.get_jobs_counts(jobs) + # TBD the following code almosts walks the db record replacing next 11 lines # for attr, val in item: # if (attr != 'name') and (len(attr) > 0): @@ -489,45 +494,118 @@ def generate_dashboard(self): # rec += '' body.table.append(BeautifulSoup( - 'Type:' + + 'Type:' + str(item["type"]) + '', 'html.parser')) body.table.append(BeautifulSoup( - 'Input Format:' + + 'Input Format:' + json.dumps(item["input_format"]) + '', 'html.parser')) body.table.append(BeautifulSoup( - 'Output Format:' + + 'Output Format:' + json.dumps(item["output_format"]) + '', 'html.parser')) body.table.append(BeautifulSoup( - 'Resource Types:' + + 'Resource Types:' + json.dumps(item["resource_types"]) + '', 'html.parser')) body.table.append(BeautifulSoup( - 'Version:' + + 'Version:' + str(item["version"]) + '', 'html.parser')) if len(item["options"]) > 0: body.table.append(BeautifulSoup( - 'Options:' + + 'Options:' + json.dumps(item["options"]) + '', 'html.parser')) if len(item["private_links"]) > 0: body.table.append(BeautifulSoup( - 'Private Links:' + + 'Private Links:' + json.dumps(item["private_links"]) + '', 'html.parser')) if len(item["public_links"]) > 0: body.table.append(BeautifulSoup( - 'Public Links:' + + 'Public Links:' + json.dumps(item["public_links"]) + '', 'html.parser')) + + body.table.append(BeautifulSoup( + 'Job Successes:' + + str(self.jobs_success) + '', + 'html.parser')) + body.table.append(BeautifulSoup( + 'Job Warnings:' + + str(self.jobs_warnings) + '', + 'html.parser')) + body.table.append(BeautifulSoup( + 'Job Failures:' + + str(self.jobs_failures) + '', + 'html.parser')) + body.table.append(BeautifulSoup( + 'Jobs Total:' + + str(self.jobs_total) + '', + 'html.parser')) + + self.get_jobs_counts(totalJobs) + body.table.append(BeautifulSoup( + 'Total Jobs', + 'html.parser')) + body.table.append(BeautifulSoup( + 'Success:' + + str(self.jobs_success) + '', + 'html.parser')) + body.table.append(BeautifulSoup( + 'Warnings:' + + str(self.jobs_warnings) + '', + 'html.parser')) + body.table.append(BeautifulSoup( + 'Failures:' + + str(self.jobs_failures) + '', + 'html.parser')) + body.table.append(BeautifulSoup( + 'Total:' + + str(self.jobs_total) + '', + 'html.parser')) + dashboard['body'] = body.prettify('UTF-8') else: self.logger.info("No modules found.") return dashboard + + def get_jobs_for_module(self, jobs, moduleName): + jobsInModule = [] + for job in jobs: + if "convert_module" in job: + name = job["convert_module"] + if name == moduleName: + jobsInModule.append(job) + + return jobsInModule + + def get_jobs_counts(self, jobs): + self.jobs_total = len(jobs) + self.jobs_warnings = 0 + self.jobs_failures = 0 + self.jobs_success = 0 + for job in jobs: + try: + errors = job['errors'] + if len(errors) > 0: + self.jobs_failures+=1 + continue + + warnings = job['warnings'] + if len(warnings) > 0: + self.jobs_warnings+=1 + continue + + self.jobs_success+=1 + + except: + self.jobs_failures+=1 + + diff --git a/setup.py b/setup.py index b2d720a7..057983c5 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ def read(f_name): setup( name='tx-manager', - version='0.2.61', + version='0.2.62', packages=[ 'client', 'manager', diff --git a/test-setup.py b/test-setup.py index 218a1c82..10e10f27 100644 --- a/test-setup.py +++ b/test-setup.py @@ -2,7 +2,7 @@ setup( name='tx-manager', - version='0.2.61', + version='0.2.62', packages=[ 'client', 'manager', diff --git a/tests/manager_tests/test_manager.py b/tests/manager_tests/test_manager.py index f938ce4d..dd7d1b2a 100644 --- a/tests/manager_tests/test_manager.py +++ b/tests/manager_tests/test_manager.py @@ -46,14 +46,18 @@ def setUpClass(cls): "status": "started", "resource_type": "obs", "input_format": "md", - "output_format": "html" + "output_format": "html", + "convert_module": "module1", + "errors" : [] }, "1": { "job_id": "1", "status": "requested", "resource_type": "obs", "input_format": "md", - "output_format": "html" + "output_format": "html", + "convert_module": "module1", + "errors" : [ "error" ] }, "2": { "job_id": "2", @@ -62,34 +66,42 @@ def setUpClass(cls): "input_format": "usfm", "output_format": "html", "callback": ManagerTest.MOCK_CALLBACK_URL, + "convert_module": "module1", + "warnings" : [] }, "3": { "job_id": "3", "status": "requested", "resource_type": "other", "input_format": "md", - "output_format": "html" + "output_format": "html", + "convert_module": "module1", + "warnings" : [ "warning" ] }, "4": { "job_id": "4", "status": "requested", "resource_type": "unsupported", "input_format": "md", - "output_format": "html" + "output_format": "html", + "convert_module": "module1", + "warnings" : [ "warning1", "warning2" ] }, "6": { "job_id": "6", "status": "requested", "resource_type": "obs", "input_format": "md", - "output_format": "html" + "output_format": "html", + "convert_module": "module2" }, "7": { "job_id": "7", "status": "requested", "resource_type": "obs", "input_format": "md", - "output_format": "html" + "output_format": "html", + "convert_module": "module2" } }, keyname="job_id") cls.mock_module_db = mock_utils.mock_db_handler(data={ @@ -312,7 +324,7 @@ def test_start_job1(self, mock_requests_post): data = args[1] self.assertIsInstance(data, dict) self.assertIn("errors", data) - self.assertEqual(len(data["errors"]), 0) + self.assertEqual(len(data["errors"]), 1) self.assertIn("warnings", data) self.assertTrue(len(data["warnings"]) > 0) @@ -581,15 +593,67 @@ def test_generate_dashboard(self): soup = BeautifulSoup(dashboard['body'], 'html.parser') # there should be a table tag self.assertIsNotNone(soup.find('table')) - # module1 should have 8 rows of info - self.assertEquals(len(soup.table.findAll('tr', id=lambda x: x and x.startswith('module1-'))), 8) - # module2 should have 7 rows of info - self.assertEquals(len(soup.table.findAll('tr', id=lambda x: x and x.startswith('module2-'))), 7) - # module3 should have 5 rows of info - self.assertEquals(len(soup.table.findAll('tr', id=lambda x: x and x.startswith('module3-'))), 5) + + moduleName = 'module1' + expectedRowCount = 12 + expectedSuccessCount = 2 + expectedWarningCount = 2 + expectedFailureCount = 1 + self.validateModule(soup, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount) + + moduleName = 'module2' + expectedRowCount = 11 + expectedSuccessCount = 2 + expectedWarningCount = 0 + expectedFailureCount = 0 + self.validateModule(soup, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount) + + moduleName = 'module3' + expectedRowCount = 9 + expectedSuccessCount = 0 + expectedWarningCount = 0 + expectedFailureCount = 0 + self.validateModule(soup, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount) + + moduleName = 'totals' + expectedRowCount = 4 + expectedSuccessCount = 4 + expectedWarningCount = 2 + expectedFailureCount = 1 + self.validateModule(soup, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount) # helper methods # + def validateModule(self, soup, moduleName, expectedRowCount, expectedSuccessCount, expectedFailureCount, + expectedWarningCount): + module = soup.table.findAll('tr', id=lambda x: x and x.startswith(moduleName + '-')) + rowCount = len(module) + self.assertEquals(rowCount, expectedRowCount) + successCount = self.getCountFromRow(soup, moduleName + '-job-success') + self.assertEquals(successCount, expectedSuccessCount) + warningCount = self.getCountFromRow(soup, moduleName + '-job-warning') + self.assertEquals(warningCount, expectedWarningCount) + failureCount = self.getCountFromRow(soup, moduleName + '-job-failure') + self.assertEquals(failureCount, expectedFailureCount) + expectedTotalCount = expectedFailureCount + expectedSuccessCount + expectedWarningCount + totalCount = self.getCountFromRow(soup, moduleName + '-job-total') + self.assertEquals(totalCount, expectedTotalCount) + + def getCountFromRow(self, soup, rowID): + success = soup.table.findAll('tr', id=lambda x: x == rowID) + dataFields = success[0].findAll("td") + strings = dataFields[1].stripped_strings # get data from second column + count = -1 + for string in strings: + count = int(string) + break + + return count + def call_args(self, mock_object, num_args, num_kwargs=0): """ :param mock_object: mock object that is expected to have been called