diff --git a/.gitignore b/.gitignore index 780906a..c4c1e88 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,6 @@ BUILD **/__pycache__ -/tests/*/*.out +/test/*/*.out /src/cells_index/*.csv /src/cells_index/*.tsv \ No newline at end of file diff --git a/VERSION b/VERSION index 2165f8f..50aea0e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.4 +2.1.0 \ No newline at end of file diff --git a/hs-ontology-api-spec.yaml b/hs-ontology-api-spec.yaml index 1bea432..04fa162 100644 --- a/hs-ontology-api-spec.yaml +++ b/hs-ontology-api-spec.yaml @@ -10,33 +10,123 @@ servers: - url: https://ontology.api.hubmapconsortium.org/ description: Production server paths: + /assayclasses: + get: + operationId: assayclass_get + summary: REPLACEMENT for datasets endpoint. Returns metadata on "assay classes"--the rule-based classifications of datasets managed by the Rules Engine. An assay class describes characteristics that are common to all datasets that are processed with a particular workflow. + parameters: + - name: application_context + in: query + required: true + description: Filter to indicate application context + schema: + type: string + default: HUBMAP + enum: + - HUBMAP + - SENNET + - hubmap + - sennet + - HuBMAP + - SenNet + - name: process_state + in: query + required: false + description: optional filter to only assay classes that correspond to a particular process state + schema: + type: string + enum: + - primary + - derived + - epic + - name: assaytype + in: query + required: false + description: optional filter to a particular assaytype. Multiple assay classes can have the same assaytype. + schema: + type: string + example: AF + responses: + '200': + description: A JSON array of assay classification objects + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/AssayClass' + '400': + description: Missing parameter; invalid parameter value + '404': + description: No assay classes matching parameters + '5XX': + description: Unknown error + /assayclasses/{class}: + get: + operationId: assayclass_name_get + summary: REPLACEMENT for datasets endpoint. Returns metadata on the specified "assay class"--the rule-based classification of a dataset managed by the Rules Engine. An assay class describes characteristics that are common to all datasets that are processed with a particular workflow. + parameters: + - name: class + in: path + required: true + description: An identifier for an assay class. Can be EITHER the string that corresponds to the "rule_description" key in the testing_rules_chain.json file (e.g., "non-DCWG primary AF") OR the code from the UBKG for the assay class (e.g., "C200001"). Note that HUBMAP and SENNET have equivalent codes for each assay class for which the SAB differs--e.g., the "non-DCWG primary AF" assay class has full code "HUBMAP:C200001" in HUBMAP and "SENNET:C200001" in SENNET. If using the code, do not include the application context (HUBMAP or SENNET). + schema: + type: string + example: non-DCWG primary AF + - name: application_context + in: query + required: true + description: Filter to indicate application context + schema: + type: string + default: HUBMAP + enum: + - HUBMAP + - SENNET + - hubmap + - sennet + - HuBMAP + - SenNet + - name: process_state + in: query + required: false + description: optional filter to only assay classes that correspond to a particular process state + schema: + type: string + enum: + - primary + - derived + - epic + responses: + '200': + description: An assay classification objects + content: + application/json: + schema: + $ref: '#/components/schemas/AssayClass' + '400': + description: Missing parameter; invalid parameter value + '404': + description: No assay classes matching parameters + '5XX': + description: Unknown error /assayname: # CPK MAR 2023 - replacement for search-api endpoint of the same name post: operationId: assayname_post - summary: Replacement for the same endpoint in search-api + summary: Replacement for assayname endpoint in the search-api requestBody: required: true content: application/json: schema: $ref: "#/components/schemas/AssayNameRequest" - example: - name: [ PAS, Image Pyramid ] - application_context: HUBMAP responses: '200': - description: A JSON array of the assay type name + description: A JSON array of the assaytype with the specified name. content: application/json: schema: $ref: '#/components/schemas/AssayTypePropertyInfo' - example: - name: bulk-RNA - primary: true - description: Bulk RNA-seq - vitessce-hints: ["pyramid","anndata"] - contains-pii: true - vis-only: true '400': description: Invalid request - missing request body; missing key *name* in request body; value for key *name* incorrectly specified '404': @@ -76,13 +166,6 @@ paths: application/json: schema: $ref: '#/components/schemas/AssayTypePropertyInfo' - example: - name: bulk-RNA - primary: true - description: Bulk RNA-seq - vitessce-hints: ["pyramid","anndata"] - contains-pii: true - vis-only: true /assaytype/{name}: # CPK MAR 2023 - replacement for search-src endpoint of the same name get: operationId: assaytype_name_get @@ -123,111 +206,6 @@ paths: vitessce-hints: ["pyramid","anndata"] contains-pii: true vis-only: true - /datasets: # JAS FEB 2023 - note that parameters use underscores, even when the corresponding key in the response uses dashes--e.g., alt_names, alt-names - get: - operationId: dataset_get - summary: Returns information on a set of HuBMAP or SenNet dataset types, with options to filter the list to those with specific property values. Filters are additive (i.e., boolean AND) - parameters: - - name: application_context - in: query - required: true - description: Required filter to indicate application context. - schema: - type: string - enum: - - HUBMAP - - SENNET - - hubmap - - sennet - - HuBMAP - - SenNet - - name: data_type - in: query - required: false - description: Optional filter for data_type - schema: - type: string - example: AF - - name: description - in: query - required: false - description: Optional filter for display name. Use URL-encoding (space = %20). - schema: - type: string - example: Autofluorescence%20Microscopy - - name: alt_name - in: query - required: false - description: Optional filter for a single element in the alt-names list--i.e., return datasets for which alt-names includes a value that matches the parameter. Although the field is named 'alt-names', the parameter uses 'alt_name'. Use URL-encoding (space = %20) - schema: - type: string - example: Light%20Sheet - - name: primary - in: query - required: false - description: Optional filter to filter to primary (true) or derived (false) assay. - schema: - type: string - enum: - - true - - false - - name: contains_pii - in: query - required: false - description: Optional filter for whether the dataset contains Patient Identifying Information (PII). Although the field is named 'contains-pii', use 'contains_pii' as an argument. - schema: - type: string - enum: - - true - - false - - name: vis_only - in: query - required: false - description: Optional filter for whether datasets are visualization only (true). Although the field is named 'vis-only', use 'vis_only' as an argument. - schema: - type: string - enum: - - true - - false - - name: vitessce_hint - in: query - required: false - description: Optional filter for a single element in the vitessce_hint list--i.e., return datasets for which vitessce_hints includes a value that matches the parameter. Although the field is named 'vitessce-hints', use 'vitessce_hint' as an argument. - schema: - type: string - enum: - - pyramid - - anndata - - is_image - - is_tiled - - is_sc - - rna - - atac - - sprm - - codex - - maldi - - name: dataset_provider - in: query - required: false - description: Optional filter to identify the dataset provider - IEC (iec) or external (lab) - schema: - type: string - enum: - - iec - - IEC - - external - - External - - lab - - Lab - responses: - '200': - description: A JSON array of dataset types with properties, optionally filtered by parameters - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/DatasetPropertyInfo' /organs: get: operationId: get_organ_types @@ -828,29 +806,15 @@ paths: /field-assays: get: operationId: field_assays_get - summary: Return associations between ingest metadata fields and the "assays" (dataset data types). Replacement for field-assays.yaml. NOTE only those CEDAR fields that are also in legacy field-assays.yaml can be mapped to assays. In addition, the response from this endpoint is reliable only for datasets that existed prior to the deployment in 2024 of the assay classifier (aka Rules Engine, aka "soft assay types"). + summary: Return associations between ingest metadata fields and the "assays" (dataset data types). Replacement for field-assays.yaml. The response from this endpoint is reliable only for datasets that existed prior to the deployment in 2024 of the assay classifier (aka Rules Engine, aka "soft assay types"). parameters: - - name: assay_identifier + - name: assaytype in: query required: false - description: the legacy idenetifier for the assay dataset in field_assays.yaml. This may be a data_type, alt-name, or description of the dataset. + description: an identifier that corresponds to an assaytype in field_assays.yaml. Although field_assays also identifies with descriptions and alt-names, these are no longer current. schema: type: string example: scRNAseq-10xGenomics - - name: data_type - in: query - required: false - description: the legacy *data_type*--i.e., the key for ingest workflows - schema: - type: string - example: scRNAseq-10xGenomics-v3 - - name: dataset_type - in: query - required: false - description: the *soft assay* dataset type--i.e., the value used by the Rules Engine - schema: - type: string - example: RNASeq responses: '200': description: Array of ingest metadata fields; each field with its associations with assays/dataset types. @@ -876,34 +840,20 @@ paths: schema: type: string example: acquisition_instrument_model - - name: assay_identifier + - name: assaytype in: query required: false - description: the legacy idenetifier for the assay dataset in field_assays.yaml. This may be a data_type, alt-name, or description of the dataset. + description: an identifier that corresponds to an assaytype in field_assays.yaml. Although field_assays also identifies with descriptions and alt-names, these are no longer current. schema: type: string example: scRNAseq-10xGenomics - - name: data_type - in: query - required: false - description: the legacy *data_type*--i.e., the key for ingest workflows - schema: - type: string - example: scRNAseq-10xGenomics-v3 - - name: dataset_type - in: query - required: false - description: the *soft assay* dataset type--i.e., the value used by the Rules Engine - schema: - type: string - example: RNASeq responses: '200': description: Specified ingest metadata field with its associations with assays/dataset types. content: application/json: schema: - $ref: '#/components/schemas/FieldAssaysResponseSingle' + $ref: '#/components/schemas/FieldAssaysResponse' '400': description: Invalid parameter '404': @@ -989,11 +939,11 @@ components: schemas: AssayTypePropertyInfo: type: object - description: Properties of a HuBMAP/SenNet AssayType. + description: Properties of a HuBMAP/SenNet assaytype properties: name: type: string - description: AssayType name; used to characterize dataset entities in provenance hierarchy + description: assaytype hierarchy example: bulk-RNA primary: type: boolean @@ -1008,60 +958,15 @@ components: description: Flags for Vitessce visualization items: type: string - example: ["pyramid","anndata","is_image","is_tiled","is_sc","rna","atac","sprm","codex","maldi"] - contains-pii: - type: boolean - description: Whether the dataset contains Patient Identifying Information (PII) (true or false) - example: true - vis-only: - type: boolean - description: Indicates whether for visualization only (true or false) - items: - type: boolean - example: true - DatasetPropertyInfo: # Schema name JAS FEB 2023. Note that GET parameters use underscores, even when the corresponding key in the response uses dashes--e.g., alt_names, alt-names. - type: object - description: Properties of a HuBMAP/SenNet Dataset. Where applicable, key names match those found in the original assay_types.yaml. - properties: - alt-names: - type: array - description: Alternative/deprecated synonyms of data_type that may still be associated with dataset entities - items: - type: string - example: [ "MALDI-IMS-pos", "MALDI-IMS-neg" ] + example: ["pyramid","anndata"] contains-pii: type: string - description: Whether the dataset contains Patient Identifying Information (PII) - example: true - data_type: - type: string - description: Data type for the dataset; used to characterize dataset entities in provenance hierarchy - example: bulk-RNA - dataset_provider: - type: string - description: Identifies the provider of the dataset. 'External Provider' also referred to as 'lab-processed'. - example: HuBMAP IEC - description: - type: string - description: How datasets of the data type are named in the Data Portal. - example: Bulk RNA-seq - primary: - type: string - description: Indicates whether the assay is primary (true) or derived (false) - example: true + description: DEPRECATED + example: deprecated vis-only: type: string - description: Indicates whether for visualization only - items: - type: string - example: true - vitessce-hints: - type: array - description: Flags for Vitessce visualization - items: - type: string - example: ["pyramid","anndata","is_image","is_tiled","is_sc","rna","atac","sprm","codex","maldi"] - example: Microscopy + description: DEPRECATED + example: deprecated RuiCodeTerm: type: object description: Respoonse body for organs/by-code GET request @@ -1096,7 +1001,7 @@ components: example: "UT" organ_uberon: type: string - description: UBERON code + description: UBERON or FMA code. organ_cui: type: string description: The organ CUI @@ -1144,8 +1049,8 @@ components: - type: array items: type: string - description: AssayType name which can be a string or an array of strings - example: [ PAS, Image Pyramid ] + description: AssayType name which can be a string or an array of strings. This is an array for downward compatibility. + example: [AF] application_context: type: string default: HUBMAP @@ -1773,66 +1678,16 @@ components: items: type: object properties: - code_ids: - type: array - description: Codes for the field in different ontologies. HMFIELD codes are from the legacy field_*.yaml files; CEDAR codes are from CEDAR template schemas. - items: - type: string - example: ["HMFIELD:1008","CEDAR:9f654d25-4de7-4eda-899b-417f05e5d5c3"] - name: + field_name: type: string description: name of the metadata field example: acquisition_instrument_model - assays: + assaytypes: type: array - description: array of assays/datasets associated with the field + description: array of assaytypes associated with the field items: - type: object - properties: - assay_identifier: - type: string - description: The legacy identifier used in field_assays.yaml. This can be the data_type, an alt-name, or the description of the data_type used in Portal. - example: scRNAseq-10xGenomics - data_type: - type: string - description: The legacy data_type--i.e., the key for ingest workflows - example: scRNAseq-10xGenomics-v3 - dataset_type: - type: string - description: The "soft assay" dataset type--e.g., used by the Rules Engine - example: RNASeq - FieldAssaysResponseSingle: - type: object - description: Associations between single ingest meatadata field and "assays" (or datasets generated by assays) - properties: - code_ids: - type: array - description: Codes for the field in different ontologies. HMFIELD codes are from the legacy field_*.yaml files; CEDAR codes are from CEDAR template schemas. - items: - type: string - example: ["HMFIELD:1008","CEDAR:9f654d25-4de7-4eda-899b-417f05e5d5c3"] - name: - type: string - description: name of the metadata field - example: acquisition_instrument_model - assays: - type: array - description: array of assays/datasets associated with the field - items: - type: object - properties: - assay_identifier: - type: string - description: The legacy identifier used in field_assays.yaml. This can be the data_type, an alt-name, or the description of the data_type used in Portal. - example: scRNAseq-10xGenomics - data_type: - type: string - description: The legacy data_type--i.e., the key for ingest workflows - example: scRNAseq-10xGenomics-v3 - dataset_type: - type: string - description: The "soft assay" dataset type--e.g., used by the Rules Engine - example: RNASeq + type: string + example: MIBI FieldSchemasResponse: type: object description: Associations between ingest metadata fields and metadata schemas @@ -1895,3 +1750,125 @@ components: type: string description: name of schema example: imc3d + AssayClass: + type: object + description: A set of assay classifications + properties: + rule_description: + type: object + description: information on the rule in the Rules Engine's testing_rule_chain.json + properties: + code: + type: string + description: UBKG code for the rule. + example: C200150 + application_context: + type: string + description: the application context for the code--i.e., the SAB + example: HUBMAP + name: + type: string + description: corresponds to the "rule_description" key in the assay classification object in the testing_rule_chain.json. + example: non-DCWG primary IMC2D + value: + type: object + description: corresponds to the "value" key in the assay classification object in the testing_rule_chain.json. Contains configuration metadata common to all datasets that are generated by a particular processing workflow. + properties: + active_status: + type: string + description: whether the assay classification corresponds to an active ingestion workflow--i.e., whether datasets can be processed. + example: active + assaytype: + type: string + description: unique identifier for an ingestion processing workflow. + example: IMC2D + dataset_type: + type: object + description: categorization of datasets that are generated by the assay class. Allows for multiple analytical hierarchies. + properties: + dataset_type: + type: string + description: corresponds to the "dataset_type" key in the testing_rule_chain.json. + example: 2D Imaging Mass Cytometry + PDR_category: + type: string + description: category for the dataset_type per the HuBMAP Data Coordination Working Group's "Pipeline Decision Rules" (PDR) document. + example: MxNF + fig2: + type: object + description: categorization of the dataset_type that comforms to that used in Figure 2 of the 2023 Jain et al paper in Nature Cell Biology (https://www.nature.com/articles/s41556-023-01194-w). + properties: + aggregated_assaytype: + type: string + description: corresponds to "assay type" category in Figure 2. + example: LC-MS + modality: + type: string + description: corresponds to "modality" category in Figure 2. + example: Proteomics + category: + type: string + description: corresponds to "category" category in Figure 2. + example: bulk + description: + type: string + description: the string used to describe datasets generated by the processing workflow associated with the assay classification + example: 2D Imaging Mass Cytometry + dir_schema: + type: string + description: identifier for the directory schema for files in datasets generated by the processing workflow associated with the assay classification + example: imc-v0 + tbl_schema: + type: string + description: identifier for the table schema for files in datasets generated by the processing workflow associated with the assay classification + example: imc-v + measurement_assay: + type: object + description: corresponds to the generic experiment or assay modality used to generate a "raw" dataset from a tissue sample. + properties: + codes: + type: array + description: set of codes from biomedical vocabularies for the measurement assay + items: + type: object + properties: + code: + type: string + description: code from a biomedical vocabulary or ontology + example: OBI:0003096 + term: + type: string + description: preferred term for the code in the biomedical vocabulary or ontology + example: imaging mass cytometry assay + contains_full_genetic_sequences: + type: boolean + description: whether the measurement assay generates full genetic sequences from the sample. Full genetic sequencing information from human sources is considered Patient Identifying Information (PII), which is protected. + example: false + is_multiassay: + type: boolean + description: indicates whether the assay class is for a "multi-assay". Multi-assays use more than one experimental modality on samples to generate datasets--e.g., both RNAseq and ATACseq. + must_contain: + type: array + description: if the assay class is for a multi-assay, a list of the dataset types that are considered components of the multi-assay. + items: + type: string + example: + [RNASeq,ATACSeq] + pipeline_shorthand: + type: string + description: name that summarizes a multi-factor processing workflow + example: Salmon + SPRM + process_state: + type: string + description: whether datasets generated by the processing workflow associated with the assay class are "primary" (from the original experiment); "derived"/"processed" (from post-experimental data processing); or "epic" (Externally Processed Integrated Collections) + example: primary + provider: + type: string + description: the provider of datasets generated by the processing workflow associate with the assay class. Possible values are "IEC" (corresponding to datasets generated by the HuBMAP/SenNet Infrastructure and Engagement Component) or "External" (complete datasets coming directly from a HuBMAP/SenNet lab--e.g., a Tissue Mapping Center). + example: IEC + vitessce_hints: + type: array + description: set of keys used to identify the type of visualization that the Vitessce application should use to represent datasets associated with the assay class. + items: + type: string + example: [anndata,rna] diff --git a/src/hs_ontology_api/cypher/assayclass.cypher b/src/hs_ontology_api/cypher/assayclass.cypher new file mode 100644 index 0000000..7260033 --- /dev/null +++ b/src/hs_ontology_api/cypher/assayclass.cypher @@ -0,0 +1,215 @@ +// Called by the assayclassifier endpoint. + +// Return information on rule-based datasets--i.e., the datasets specified in the Rule Engine's testing rule chain. + +// Obtain identifiers for rule-based datasets (assay classes) for the application context. +// The assayclass_filter allows filtering by either the UBKG code or term (rule_description) +// for the assay class. +WITH '$context' AS context +CALL +{ + WITH context + MATCH (p:Concept)<-[:isa]-(pRBD:Concept)-[:CODE]->(cRBD:Code)-[r:PT]->(tRBD:Term) + WHERE p.CUI = context+':C000004 CUI' + AND r.CUI=pRBD.CUI + $assayclass_filter + RETURN pRBD.CUI AS CUIRBD,cRBD.CODE AS CodeRBD,tRBD.name AS NameRBD + ORDER BY pRBD.CUI +} +// assaytype +CALL +{ + WITH CUIRBD, context + MATCH (pRBD:Concept)-[:has_assaytype]->(passaytype:Concept)-[:CODE]->(cassaytype:Code)-[r:PT]->(tassaytype:Term) + WHERE pRBD.CUI=CUIRBD + $assaytype_filter + AND r.CUI=passaytype.CUI and cassaytype.SAB=context + RETURN DISTINCT REPLACE(tassaytype.name,'_assaytype','') AS assaytype +} +// dir-schema +CALL +{ + WITH CUIRBD,context + OPTIONAL MATCH (pRBD:Concept)-[:has_dir_schema]->(pdir_schema:Concept)-[:CODE]-(cdir_schema:Code)-[r:PT]->(tdir_schema:Term) + WHERE pRBD.CUI=CUIRBD AND r.CUI=pdir_schema.CUI AND cdir_schema.SAB=context + RETURN DISTINCT tdir_schema.name AS dir_schema +} +// tbl-schema +CALL +{ + WITH CUIRBD,context + OPTIONAL MATCH (pRBD:Concept)-[:has_tbl_schema]->(ptbl_schema:Concept)-[:CODE]->(ctbl_schema:Code)-[r:PT]->(ttbl_schema:Term) + WHERE pRBD.CUI=CUIRBD AND r.CUI=ptbl_schema.CUI AND ctbl_schema.SAB=context + RETURN DISTINCT ttbl_schema.name AS tbl_schema +} +// vitessce_hints +// Strip the optional suffix '_vitessce_hint' from terms such as 'rna_vitessce_hint'. +CALL +{ + WITH CUIRBD,context + OPTIONAL MATCH (pRBD:Concept)-[:has_vitessce_hint]->(pvitessce_hint:Concept)-[:CODE]->(cvitessce_hint:Code)-[r:PT]->(tvitessce_hint:Term) + WHERE pRBD.CUI=CUIRBD AND r.CUI=pvitessce_hint.CUI AND cvitessce_hint.SAB=context + RETURN COLLECT(DISTINCT REPLACE(tvitessce_hint.name,'_vitessce_hint','')) AS vitessce_hints +} +// process state. The process_state_filter allows for filtering to just primary or derived assay classes. +CALL +{ + WITH CUIRBD,context + MATCH (pRBD:Concept)-[:has_process_state]->(pdsProcess:Concept)-[:isa]->(pProcessParent:Concept), + (pdsProcess:Concept)-[:CODE]->(cdsProcess:Code)-[r:PT]->(tdsProcess:Term) + WHERE pRBD.CUI=CUIRBD + AND pProcessParent.CUI = context+':C004002 CUI' + AND r.CUI=pdsProcess.CUI + AND cdsProcess.SAB=context + $process_state_filter + RETURN tdsProcess.name as process_state +} +// dataset_type +// The dataset_type concepts in HUBMAP are cross-referenced to HRAVS concepts; however, the terms for the HRAVS concepts +// are enclosed in a list, so use the HUBMAP terms. +CALL +{ + WITH CUIRBD,context + OPTIONAL MATCH (pRBD:Concept)-[:has_dataset_type]->(pdataset_type:Concept)-[:CODE]->(cdataset_type:Code)-[r:PT]->(tdataset_type:Term) + WHERE pRBD.CUI=CUIRBD AND r.CUI=pdataset_type.CUI AND cdataset_type.SAB=context + RETURN DISTINCT tdataset_type.name AS dataset_type, pdataset_type.CUI AS CUIDatasetType +} +// Pipeline Decision Rules category +CALL +{ + WITH CUIDatasetType,context + OPTIONAL MATCH (pDatasetType:Concept)-[:has_pdr_category]->(pPDRCategory:Concept)-[:CODE]->(cPDRCategory:Code)-[r:PT]->(tPDRCategory:Term) + WHERE pDatasetType.CUI=CUIDatasetType AND r.CUI=pPDRCategory.CUI AND cPDRCategory.SAB=context + RETURN DISTINCT tPDRCategory.name AS pdr_category +} +// Fig 2 aggregated assay type +CALL +{ + WITH CUIDatasetType,context + OPTIONAL MATCH (pDatasetType:Concept)-[:has_fig2_agg_assay_type]->(pFig2agg:Concept)-[:CODE]-(cFig2agg:Code)-[r:PT]->(tFig2agg:Term) + WHERE pDatasetType.CUI=CUIDatasetType AND r.CUI=pFig2agg.CUI AND cFig2agg.SAB=context + RETURN DISTINCT tFig2agg.name AS fig2_aggregated_assaytype +} +// Fig2 modality +CALL +{ + WITH CUIDatasetType,context + OPTIONAL MATCH (pDatasetType:Concept)-[:has_fig2_modality]->(pFig2modality:Concept)-[:CODE]-(cFig2modality:Code)-[r:PT]->(tFig2modality:Term) + WHERE pDatasetType.CUI=CUIDatasetType AND r.CUI=pFig2modality.CUI AND cFig2modality.SAB=context + RETURN DISTINCT tFig2modality.name AS fig2_modality +} +// Fig2 category +CALL +{ + WITH CUIDatasetType,context + OPTIONAL MATCH (pDatasetType:Concept)-[:has_fig2_category]->(pFig2category:Concept)-[:CODE]->(cFig2category:Code)-[r:PT]->(tFig2category:Term) + WHERE pDatasetType.CUI=CUIDatasetType AND r.CUI=pFig2category.CUI AND cFig2category.SAB=context + RETURN DISTINCT tFig2category.name AS fig2_category +} +// description +CALL +{ + WITH CUIRBD,context + OPTIONAL MATCH (pRBD:Concept)-[:has_description]->(pdescription:Concept)-[:CODE]->(cdescription:Code)-[r:PT]->(tdescription:Term) + WHERE pRBD.CUI=CUIRBD AND r.CUI=pdescription.CUI AND cdescription.SAB=context + RETURN DISTINCT REPLACE(tdescription.name,'_description','') AS description +} +// pipeline-shorthand +CALL +{ + WITH CUIRBD,context + OPTIONAL MATCH (pRBD:Concept)-[:has_pipeline_shorthand]->(pshorthand:Concept)-[:CODE]->(cshorthand:Code)-[r:PT]->(tshorthand:Term) + WHERE pRBD.CUI=CUIRBD and r.CUI=pshorthand.CUI AND cshorthand.SAB=context + RETURN DISTINCT tshorthand.name AS pipeline_shorthand +} +// is multi-assay +CALL +{ + WITH CUIRBD, context + OPTIONAL MATCH (pRBD:Concept)-[:isa]->(pMulti:Concept) + WHERE pRBD.CUI=CUIRBD + AND pMulti.CUI = context+':C004033 CUI' + RETURN DISTINCT CASE WHEN pMulti.CUI IS NOT NULL THEN True ELSE False END AS is_multiassay +} +// must_contain +CALL +{ + WITH CUIRBD,context + OPTIONAL MATCH (pRBD:Concept)-[:must_contain]->(pDT:Concept)-[:CODE]-(cDT:Code)-[r:PT]->(tDT:Term) + WHERE pRBD.CUI=CUIRBD AND r.CUI=pDT.CUI AND cDT.SAB=context + RETURN COLLECT(DISTINCT tDT.name) AS must_contain +} +// measurement assay CUI +CALL +{ + WITH CUIRBD + OPTIONAL MATCH (pRBD:Concept)-[:has_measurement_assay]->(pMeas:Concept) + WHERE pRBD.CUI=CUIRBD + RETURN DISTINCT pMeas.CUI as CUIMeas +} +// Optional measurement codes +CALL +{ + WITH CUIMeas + MATCH (pMeas:Concept)-[:CODE]->(cMeas:Code)-[:PT]->(tMeas:Term) + WHERE pMeas.CUI = CUIMeas + RETURN COLLECT(DISTINCT {code:cMeas.CodeID,term:tMeas.name}) AS MeasCodes +} +// whether the measurement assay contains full_genetic_sequencesi +CALL +{ + WITH CUIMeas,context + OPTIONAL MATCH (pRBD:Concept)-[:contains]->(ppii:Concept) + WHERE pRBD.CUI=CUIMeas + AND ppii.CUI = context+':C004009 CUI' + RETURN DISTINCT CASE WHEN NOT ppii.CUI IS null THEN true ELSE false END AS contains_full_genetic_sequences +} +// provider +CALL +{ + WITH CUIRBD,context + OPTIONAL MATCH (pRBD:Concept)-[:has_provider]->(pProvider:Concept)-[:CODE]-(cProvider:Code)-[r:PT]->(tProvider:Term) + WHERE pRBD.CUI=CUIRBD AND r.CUI=pProvider.CUI AND cProvider.SAB=context + RETURN DISTINCT tProvider.name AS provider +} +// active status +CALL +{ + WITH CUIRBD,context + OPTIONAL MATCH (pRBD:Concept)-[:has_active_status]->(pStatus:Concept)-[:CODE]->(cStatus:Code)-[r:PT]->(tStatus:Term) + WHERE pRBD.CUI=CUIRBD AND r.CUI=pStatus.CUI and cStatus.SAB=context + RETURN DISTINCT tStatus.name AS active_status +} +CALL +{ +WITH context, CodeRBD, NameRBD, assaytype, dir_schema, tbl_schema, vitessce_hints,process_state,pipeline_shorthand,description,dataset_type,pdr_category,fig2_aggregated_assaytype,fig2_modality,fig2_category,is_multiassay,must_contain,MeasCodes,contains_full_genetic_sequences,provider,active_status +RETURN +{ + rule_description: + { code:CodeRBD,application_context:context, name:NameRBD + }, + value: + { + assaytype:assaytype, dir_schema:dir_schema, tbl_schema:tbl_schema, vitessce_hints:vitessce_hints, + process_state:process_state, + pipeline_shorthand:pipeline_shorthand, description:description, + is_multiassay:is_multiassay, must_contain:must_contain, + provider:provider, + active_status:active_status, + dataset_type: + { + dataset_type:dataset_type, PDR_category:pdr_category, + fig2: + { + aggregated_assaytype:fig2_aggregated_assaytype, modality:fig2_modality, category:fig2_category + } + }, + measurement_assay:{ + codes:MeasCodes, + contains_full_genetic_sequences:contains_full_genetic_sequences + } + } +} AS rule_based_dataset +} +WITH rule_based_dataset +RETURN rule_based_dataset AS rule_based_datasets diff --git a/src/hs_ontology_api/cypher/fieldassays.cypher b/src/hs_ontology_api/cypher/fieldassays.cypher index e1d3ca5..e14cc6a 100644 --- a/src/hs_ontology_api/cypher/fieldassays.cypher +++ b/src/hs_ontology_api/cypher/fieldassays.cypher @@ -1,96 +1,39 @@ -// Obtains associations between ingest metadata fields and assay dataset types, both for legacy (HMFIELD) and CEDAR. +// Obtains associations between ingest metadata fields and assaytypes, for legacy metadata only (HMFIELD) // Used by the field-assays endpoint. -// NOTE: With the deployment of the assay classifier (Rules Engine, or "soft assay types"), the UBKG is no longer the -// source of truth for assay type. This endpoint is primarily for legacy datasets. - -// Identify all metadata fields, from both: -// - legacy sources (the field_*.yaml files in ingest-validation-tools, and modeled in HMFIELD), child codes of HMFIELD:1000 -// - current sources (CEDAR tempates, modeled in CEDAR), child codes of CEDAR:TemplateField -// Fields that are in the intersection of HMFIELD and CEDAR share CUIs. - -// Collect the HMFIELD and CEDAR codes for each metadata field to flatten to level of field name. +// Identify all metadata fields, from legacy sources (the field_*.yaml files in ingest-validation-tools, and modeled in HMFIELD), child codes of //HMFIELD:1000 // The function that calls this query will replace the variable field_filter. + CALL { MATCH (cFieldParent:Code)<-[:CODE]-(pFieldParent:Concept)-[:inverse_isa]->(pField:Concept)-[:CODE]->(cField:Code)-[rField:PT]->(tField:Term) WHERE rField.CUI=pField.CUI - AND cFieldParent.CodeID IN ['HMFIELD:1000','CEDAR:TemplateField'] + AND cFieldParent.CodeID IN ['HMFIELD:1000'] $field_filter - RETURN tField.name AS field_name, pField.CUI as CUIField, apoc.text.join(COLLECT(DISTINCT cField.CodeID),'|') AS code_ids + RETURN tField.name AS field_name, pField.CUI as CUIField, cField.CodeID as field_code_id ORDER BY tField.name } // For each field, find the associated assay identifier (originally from field_assays.yaml). // These identifiers can be one of three types: // - description -// - data_type -// - alt_name -// These identifiers are cross-referenced to CUIs for codes in the HUBMAP Dataset hierarchy. -// +// - assaytype +// - alt-name +// However, only assaytype is relevant: alt-names were deprecated, and descriptions are no longer current. // The function that calls this query will replace the variable assay_type_filter. +// HMFIELD originally mapped fields to the "dataset type" in the older HuBMAP dataset hierarchy, but now maps directly to the HUBMAP +// code for the assaytype. CALL { WITH CUIField - OPTIONAL MATCH (pField:Concept)-[:used_in_dataset]->(pAssay:Concept)-[:CODE]->(cAssay:Code)-[r:PT]->(tAssay:Term) - WHERE pField.CUI=CUIField AND cAssay.SAB='HMFIELD' + MATCH (pField:Concept)-[:used_in_dataset]->(pAssayType:Concept)-[:CODE]->(cAssayType:Code)-[r:PT]->(tAssayType:Term) + WHERE pField.CUI=CUIField AND cAssayType.SAB='HUBMAP' AND r.CUI=pAssayType.CUI $assay_type_filter - RETURN DISTINCT cAssay.CodeID AS assay_code_id, - CASE WHEN tAssay.name IS NULL THEN 'none' ELSE tAssay.name END AS assay_identifier -} - -// In HMFIELD, assay identifiers are cross-referenced to CUIs for codes in the HUBMAP "hard" Dataset hierarchy. -// Duplicate assignments are possible--e.g., a HuBMAP dataset is assigned to both a data_type and an alt-name. -// Because a CUI can be the "preferred" CUI for only one code in an ontology, this results in some assay identifiers -// being associated with multiple CUIs--i.e., one to the CUI shared by the HUBMAP dataset code, -// one to a new CUI for the HMFIELD code. -// We want the HUBMAP CUI for each HMFIELD code, regardless of whether it is the "preferred" CUI. - -// SPECIAL CASE: The identifiers for 10x Multiome have so many variants of case and delimiter for -// 10xmultiome, so hard-code the CUI mapping. - -CALL -{ - WITH assay_code_id, assay_identifier - OPTIONAL MATCH (pAssay:Concept)-[:CODE]->(cAssay:Code) - WHERE cAssay.CodeID = assay_code_id - AND pAssay.CUI STARTS WITH 'HUBMAP' - RETURN distinct CASE WHEN assay_identifier='10x Multiome' THEN 'HUBMAP:C014002 CUI' ELSE pAssay.CUI END AS CUIHMDataset + RETURN COLLECT(DISTINCT tAssayType.name) AS assaytypes } +WITH field_name, assaytypes +WHERE assaytypes <>[] -// For each HuBMAP Dataset, obtain the data_type. - -CALL -{ - WITH CUIHMDataset - OPTIONAL MATCH (pAssay:Concept)-[:has_data_type]->(pDataType:Concept)-[:CODE]->(cDataType:Code)-[r:PT]->(tDataType:Term) - WHERE pAssay.CUI=CUIHMDataset - AND cDataType.SAB ='HUBMAP' - AND r.CUI=pDataType.CUI - RETURN CASE WHEN tDataType.name IS NULL THEN 'none' ELSE tDataType.name END AS data_type -} - -// For each HuBMAP Dataset, obtain the "soft assay" dataset type. -// The "soft assay" dataset type is a member of the Soft Assay Dataset Type hierarchy in HUBMAP, with parent code -// HUBMAP:C003041 -CALL -{ - WITH CUIHMDataset - OPTIONAL MATCH (pAssay:Concept)-[:isa]->(pSoftAssayDatasetType:Concept)-[:isa]->(pSoftAssayDatasetTypeRoot:Concept)-[:CODE]->(cSoftAssayDatasetTypeRoot:Code)-[r:PT]->(tSoftAssayDatasetTypeRoot:Term), - (pSoftAssayDatasetType:Concept)-[:CODE]->(cSoftAssayDatasetType:Code)-[r2:PT]->(tSoftAssayDatasetType:Term) - WHERE pAssay.CUI = CUIHMDataset - AND cSoftAssayDatasetTypeRoot.CodeID='HUBMAP:C003041' - AND r.CUI=pSoftAssayDatasetTypeRoot.CUI - AND r2.CUI=pSoftAssayDatasetType.CUI - RETURN CASE WHEN tSoftAssayDatasetType.name IS NULL THEN 'none' ELSE tSoftAssayDatasetType.name END as dataset_type -} - -// Collect assay_identifier, data_type, and dataset_type into a delimited list to flatten to level of field name. -// The function that calls this query will replace the variable data_type_dataset_type_filters. - -WITH field_name, code_ids, assay_identifier, data_type, dataset_type -$data_type_dataset_type_filters -RETURN field_name, code_ids, COLLECT(DISTINCT assay_identifier + '|' + data_type + '|' + dataset_type) AS assays -ORDER BY field_name \ No newline at end of file +RETURN {fields:COLLECT(DISTINCT {field_name: field_name, assaytypes:assaytypes})} AS fieldassays \ No newline at end of file diff --git a/src/hs_ontology_api/cypher/fieldassays_old.cypher b/src/hs_ontology_api/cypher/fieldassays_old.cypher new file mode 100644 index 0000000..2737ea8 --- /dev/null +++ b/src/hs_ontology_api/cypher/fieldassays_old.cypher @@ -0,0 +1,96 @@ +// Obtains associations between ingest metadata fields and assaytypes, both for legacy (HMFIELD) and CEDAR. +// Used by the field-assays endpoint. + +// NOTE: With the deployment of the assay classifier (Rules Engine, or "soft assay types"), the UBKG is no longer the +// source of truth for assay type. This endpoint is primarily for legacy datasets. + +// Identify all metadata fields, from both: +// - legacy sources (the field_*.yaml files in ingest-validation-tools, and modeled in HMFIELD), child codes of HMFIELD:1000 +// - current sources (CEDAR tempates, modeled in CEDAR), child codes of CEDAR:TemplateField +// Fields that are in the intersection of HMFIELD and CEDAR share CUIs. + +// Collect the HMFIELD and CEDAR codes for each metadata field to flatten to level of field name. + +// The function that calls this query will replace the variable field_filter. +CALL +{ + MATCH (cFieldParent:Code)<-[:CODE]-(pFieldParent:Concept)-[:inverse_isa]->(pField:Concept)-[:CODE]->(cField:Code)-[rField:PT]->(tField:Term) + WHERE rField.CUI=pField.CUI + AND cFieldParent.CodeID IN ['HMFIELD:1000','CEDAR:TemplateField'] + $field_filter + RETURN tField.name AS field_name, pField.CUI as CUIField, apoc.text.join(COLLECT(DISTINCT cField.CodeID),'|') AS code_ids + ORDER BY tField.name +} + +// For each field, find the associated assay identifier (originally from field_assays.yaml). +// These identifiers can be one of three types: +// - description +// - data_type +// - alt_name +// These identifiers are cross-referenced to CUIs for codes in the HUBMAP Dataset hierarchy. +// + +// The function that calls this query will replace the variable assay_type_filter. +CALL +{ + WITH CUIField + OPTIONAL MATCH (pField:Concept)-[:used_in_dataset]->(pAssay:Concept)-[:CODE]->(cAssay:Code)-[r:PT]->(tAssay:Term) + WHERE pField.CUI=CUIField AND cAssay.SAB='HMFIELD' + $assay_type_filter + RETURN DISTINCT cAssay.CodeID AS assay_code_id, + CASE WHEN tAssay.name IS NULL THEN 'none' ELSE tAssay.name END AS assay_identifier +} + +// In HMFIELD, assay identifiers are cross-referenced to CUIs for codes in the HUBMAP "hard" Dataset hierarchy. +// Duplicate assignments are possible--e.g., a HuBMAP dataset is assigned to both a data_type and an alt-name. +// Because a CUI can be the "preferred" CUI for only one code in an ontology, this results in some assay identifiers +// being associated with multiple CUIs--i.e., one to the CUI shared by the HUBMAP dataset code, +// one to a new CUI for the HMFIELD code. +// We want the HUBMAP CUI for each HMFIELD code, regardless of whether it is the "preferred" CUI. + +// SPECIAL CASE: The identifiers for 10x Multiome have so many variants of case and delimiter for +// 10xmultiome, so hard-code the CUI mapping. + +CALL +{ + WITH assay_code_id, assay_identifier + OPTIONAL MATCH (pAssay:Concept)-[:CODE]->(cAssay:Code) + WHERE cAssay.CodeID = assay_code_id + AND pAssay.CUI STARTS WITH 'HUBMAP' + RETURN distinct CASE WHEN assay_identifier='10x Multiome' THEN 'HUBMAP:C014002 CUI' ELSE pAssay.CUI END AS CUIHMDataset +} + +// For each HuBMAP Dataset, obtain the data_type. + +CALL +{ + WITH CUIHMDataset + OPTIONAL MATCH (pAssay:Concept)-[:has_data_type]->(pDataType:Concept)-[:CODE]->(cDataType:Code)-[r:PT]->(tDataType:Term) + WHERE pAssay.CUI=CUIHMDataset + AND cDataType.SAB ='HUBMAP' + AND r.CUI=pDataType.CUI + RETURN CASE WHEN tDataType.name IS NULL THEN 'none' ELSE tDataType.name END AS data_type +} + +// For each HuBMAP Dataset, obtain the "soft assay" dataset type. +// The "soft assay" dataset type is a member of the Soft Assay Dataset Type hierarchy in HUBMAP, with parent code +// HUBMAP:C003041 +CALL +{ + WITH CUIHMDataset + OPTIONAL MATCH (pAssay:Concept)-[:isa]->(pSoftAssayDatasetType:Concept)-[:isa]->(pSoftAssayDatasetTypeRoot:Concept)-[:CODE]->(cSoftAssayDatasetTypeRoot:Code)-[r:PT]->(tSoftAssayDatasetTypeRoot:Term), + (pSoftAssayDatasetType:Concept)-[:CODE]->(cSoftAssayDatasetType:Code)-[r2:PT]->(tSoftAssayDatasetType:Term) + WHERE pAssay.CUI = CUIHMDataset + AND cSoftAssayDatasetTypeRoot.CodeID='HUBMAP:C003041' + AND r.CUI=pSoftAssayDatasetTypeRoot.CUI + AND r2.CUI=pSoftAssayDatasetType.CUI + RETURN CASE WHEN tSoftAssayDatasetType.name IS NULL THEN 'none' ELSE tSoftAssayDatasetType.name END as dataset_type +} + +// Collect assay_identifier, data_type, and dataset_type into a delimited list to flatten to level of field name. +// The function that calls this query will replace the variable data_type_dataset_type_filters. + +WITH field_name, code_ids, assay_identifier, data_type, dataset_type +$data_type_dataset_type_filters +RETURN field_name, code_ids, COLLECT(DISTINCT assay_identifier + '|' + data_type + '|' + dataset_type) AS assays +ORDER BY field_name \ No newline at end of file diff --git a/src/hs_ontology_api/cypher/organs.cypher b/src/hs_ontology_api/cypher/organs.cypher index a3f6c15..34b5aa7 100644 --- a/src/hs_ontology_api/cypher/organs.cypher +++ b/src/hs_ontology_api/cypher/organs.cypher @@ -14,9 +14,16 @@ CALL CALL { WITH OrganCUI OPTIONAL MATCH (pOrgan:Concept)-[r1:CODE]->(cOrgan:Code)-[r2:PT]->(tOrgan:Term) - WHERE pOrgan.CUI=OrganCUI AND cOrgan.SAB='UBERON' + WHERE pOrgan.CUI=OrganCUI AND cOrgan.SAB IN ['UBERON'] AND r2.CUI=pOrgan.CUI RETURN cOrgan.CodeID AS OrganUBERON } +// Obtain FMA codes. +CALL +{ + WITH OrganCUI OPTIONAL MATCH (pOrgan:Concept)-[r1:CODE]->(cOrgan:Code)-[r2:PT]->(tOrgan:Term) + WHERE pOrgan.CUI=OrganCUI AND cOrgan.SAB IN ['FMA'] + AND r2.CUI=pOrgan.CUI RETURN cOrgan.CodeID AS OrganFMA +} // RUI codes are property nodes linked to organ nodes. CALL { @@ -24,6 +31,6 @@ CALL WHERE pOrgan.CUI=OrganCUI AND r1.SAB=$sab RETURN t2CC.name as OrganTwoCharacterCode } // Filter out the "Other" organ node. -WITH OrganCode,OrganSAB,OrganName,OrganTwoCharacterCode,OrganUBERON,OrganCUI +WITH OrganCode,OrganSAB,OrganName,OrganTwoCharacterCode,OrganUBERON,OrganFMA,OrganCUI WHERE NOT (OrganCode = 'C030071' AND OrganSAB=$sab) -RETURN OrganCode,OrganSAB,OrganName,OrganUBERON,OrganTwoCharacterCode,OrganCUI ORDER BY OrganName +RETURN OrganCode,OrganSAB,OrganName,CASE WHEN OrganUBERON IS NULL THEN OrganFMA ELSE OrganUBERON END AS OrganUBERON,OrganTwoCharacterCode,OrganCUI ORDER BY OrganName diff --git a/src/hs_ontology_api/models/fieldassay.py b/src/hs_ontology_api/models/fieldassay.py index b5660c9..a8525da 100644 --- a/src/hs_ontology_api/models/fieldassay.py +++ b/src/hs_ontology_api/models/fieldassay.py @@ -1,5 +1,7 @@ # coding: utf-8 +# JULY 2024 - Refactored to account for new assay class model. + # JAS December 2023 # FieldAssay model class # Used by the field-assays endpoint. @@ -20,19 +22,18 @@ def __init__(self, code_ids=None, name=None, assays=None): Replaces and enhances the legacy field_assays.yaml with additional information related to HUBMAP datasets and CEDAR. - :param code_ids: delimited list of code_ids for the metadata field. The code_ids can come from both - HMFIELD or CEDAR. - :param name: equivalent of the field key in the yaml (HMFIELD) or field name (CEDAR) + :param code_ids: delimited list of code_ids for the metadata field. The code_ids come from + HMFIELD. + :param name: equivalent of the field key in the yaml (HMFIELD) :param assays: delimited list of values in format ||. Each value in the list has elements: - assay_identifier: the assay identifier for the assay from the yaml. This can be a "not DCWG" (i.e., pre-soft assay) data_type; an alt-name; or a dataset description used in the Data Portal. - - data_type: the pre-soft assay data_type, if this is a "not DCWG" assay dataset - - dataset_type: the "soft assay" dataset type + - assay_type: the pre-soft assay data_type, if this is a "not DCWG" assay dataset example: - code_ids - [HMFIELD:1008|CEDAR:9f654d25-4de7-4eda-899b-417f05e5d5c3] + code_ids - [HMFIELD:1008] name - acquisition_instrument_model assays - [scRNAseq-10xGenomics|scRNAseq-10xGenomics-v3|RNASeq,...] @@ -65,8 +66,7 @@ def __init__(self, code_ids=None, name=None, assays=None): # If no associated assays were found, the query returns ['none|none|none']. for assay in assays: if assay.split('|')[0] != 'none': - dictassay = {'assay_identifier': assay.split('|')[0], 'data_type': assay.split('|')[1], - 'dataset_type': assay.split('|')[2]} + dictassay = {'assay_identifier': assay.split('|')[0], 'assay_type': assay.split('|')[1]} listassays.append(dictassay) self._assays = listassays diff --git a/src/hs_ontology_api/routes/datasets/__init__.py b/src/hs_ontology_api/routes/assayclasses/__init__.py similarity index 100% rename from src/hs_ontology_api/routes/datasets/__init__.py rename to src/hs_ontology_api/routes/assayclasses/__init__.py diff --git a/src/hs_ontology_api/routes/assayclasses/assayclasses_controller.py b/src/hs_ontology_api/routes/assayclasses/assayclasses_controller.py new file mode 100644 index 0000000..0f812df --- /dev/null +++ b/src/hs_ontology_api/routes/assayclasses/assayclasses_controller.py @@ -0,0 +1,73 @@ +from flask import Blueprint, jsonify, current_app, make_response,request + +from ubkg_api.utils.http_error_string import (get_404_error_string, validate_query_parameter_names, + validate_parameter_value_in_enum, validate_required_parameters) +from hs_ontology_api.utils.neo4j_logic import assayclasses_get_logic + +assayclasses_blueprint = Blueprint('assayclasses_hs', __name__, url_prefix='/assayclasses') + +@assayclasses_blueprint.route('', methods=['GET']) +def assayclasses_expand_get(): + return assayclasses_get() +@assayclasses_blueprint.route('/', methods=['GET']) +def assayclasses_name_expand_get(name): + return assayclasses_get(name=name) + + +def assayclasses_get(name=None): + """Returns information on a set of HuBMAP or SenNet assay classifications, rule-based dataset "kinds" that are + in the testing rules json, with options to filter the list to those with specific property values. + Filters are additive (i.e., boolean AND) + + """ + # Validate parameters. + + # Check for invalid parameter names. + err = validate_query_parameter_names(parameter_name_list=['application_context','process_state','assaytype']) + if err != 'ok': + return make_response(err, 400) + + # Check for required parameters. + err = validate_required_parameters(required_parameter_list=['application_context']) + if err != 'ok': + return make_response(err, 400) + application_context = request.args.get('application_context') + + # Check for valid application context. The parameter is case-insensitive. + val_enum = ['HUBMAP','SENNET'] + err = validate_parameter_value_in_enum(param_name='application_context', param_value=application_context.upper(), enum_list=val_enum) + if err != 'ok': + return make_response(err, 400) + application_context = application_context.upper() + + # Check for valid parameter values. + process_state = request.args.get('process_state') + if process_state is not None: + process_state = process_state.lower() + val_enum=['primary','derived','epic'] + err = validate_parameter_value_in_enum(param_name='process_state',param_value=process_state,enum_list=val_enum) + if err != 'ok': + return make_response(err, 400) + + # Filter by assaytype. + # If this is for the endpoint that filters by assay class, then ignore filtering by assaytype. + # (The endpoint that filters by assay class assumes a single response, and assaytype is not unique for assay + # classes). + assaytype = request.args.get('assaytype') + if assaytype is not None and name is not None: + assaytype = None + + neo4j_instance = current_app.neo4jConnectionHelper.instance() + result = assayclasses_get_logic( + neo4j_instance, assayclass=name, process_state=process_state, assaytype=assaytype, context=application_context) + + if result is None or result == []: + # Empty result + err = get_404_error_string(prompt_string=f"No results for " + f"specified parameters") + return make_response(err, 404) + + if len(result) == 1: + return jsonify(result[0]) + else: + return jsonify(result) diff --git a/src/hs_ontology_api/routes/assayname/assayname_controller.py b/src/hs_ontology_api/routes/assayname/assayname_controller.py index 65b3199..ee34298 100644 --- a/src/hs_ontology_api/routes/assayname/assayname_controller.py +++ b/src/hs_ontology_api/routes/assayname/assayname_controller.py @@ -1,6 +1,11 @@ -from flask import Blueprint, jsonify, current_app, request, make_response +# The assayname route replaces the equivalent legacy routes in the search-api. +# July 2024 - refactored to work with new UBKG assay class model. -from hs_ontology_api.utils.neo4j_logic import assaytype_name_get_logic +from flask import Blueprint, jsonify, current_app, request, make_response +from ubkg_api.utils.http_error_string import (get_404_error_string, validate_query_parameter_names, + validate_parameter_value_in_enum,validate_required_parameters) +from hs_ontology_api.utils.neo4j_logic import assayclasses_get_logic +from hs_ontology_api.utils.listdict import remove_duplicate_dicts_from_list assayname_blueprint = Blueprint('assayname', __name__, url_prefix='/assayname') @@ -15,29 +20,71 @@ def assayname_post(): The 'name' is also specified in the Request Data (again see AssayNameRequest in ubkg-api-spec.yaml). - :rtype: Union[AssayTypePropertyInfo, Tuple[AssayTypePropertyInfo, int], Tuple[AssayTypePropertyInfo, int, Dict[str, str]] """ + if not request.is_json: - return make_response("A JSON body with a 'Content-Type: application/json' header are required", 400) + return make_response("A JSON request body with a 'Content-Type: application/json' header are required", 400) + if 'name' not in request.json: - return make_response('Request contains no "name" field', 400) + return make_response('Request body contains no "name" field', 400) + application_context = "HUBMAP" + if 'application_context' in request.json: application_context = request.json['application_context'] - req_name = request.json['name'] - alt_names: list = None - if type(req_name) == list and len(req_name) > 0: - name = req_name[0] - if len(req_name) > 1: - alt_names = req_name[1:] - elif type(req_name) == str: - name = req_name - else: - return make_response("The 'name' field is incorrectly specified " - "(see AssayNameRequest in ubkg-api-spec.yaml)", 400) + + # Check for valid application context. + val_enum = ['HUBMAP', 'SENNET'] + err = validate_parameter_value_in_enum(param_name='application_context', param_value=application_context, + enum_list=val_enum) + if err != 'ok': + return make_response(err, 400) + + # req_name = request.json['name'] + # The following is legacy code that attempted to handle composite and "alt-names", which have been deprecated. + # alt_names: list = None + # if type(req_name) == list and len(req_name) > 0: + # name = req_name[0] + # if len(req_name) > 1: + # alt_names = req_name[1:] + # elif type(req_name) == str: + # name = req_name + # else: + # return make_response("The 'name' field is incorrectly specified " + # "(see AssayNameRequest in ubkg-api-spec.yaml)", 400) + + # Assume that only a single assay name is provided. + name = request.json['name'][0] neo4j_instance = current_app.neo4jConnectionHelper.instance() - result = assaytype_name_get_logic(neo4j_instance, name, alt_names, application_context) - if result is None: - # JAS Oct 2023 changed from 400 to 404 - return make_response(f"No such assay_type {req_name}, even as alternate name", 404) - return jsonify(result) + + result = assayclasses_get_logic( + neo4j_instance, assaytype=name, context=application_context) + + if (result is None or result == []): + # Empty result + err = get_404_error_string(prompt_string=f"No results for " + f"specified parameters") + return make_response(err, 404) + + # Build the legacy response from the new response. + listresponse = [] + for r in result: + assaytype = {} + val = r.get('value') + assaytype['name'] = val.get('assaytype') + # July 2024 - Process states have changed from a simple binary for primary/derived to a categorical + # value of 'primary', 'derived', or 'epic'. The legacy model will observe the binary model. + assaytype['primary'] = val.get('process_state')=='primary' + assaytype['description'] = val.get('description') + assaytype['vitessce-hints'] = val.get('vitessce_hints') + # The vis-only and contains-pii properties have been deprecated. + assaytype['vis-only'] = 'deprecated' + assaytype['contains-pii'] = 'deprecated' + listresponse.append(assaytype) + + # Remove duplicates. There will likely be both "non-DCWG" and "DCWG" rules for the same assaytype, for which the + # subset used for assaytype response will contain duplicates. + listunique = remove_duplicate_dicts_from_list(listinput=listresponse, indexval='name') + + return jsonify(listunique[0]) + diff --git a/src/hs_ontology_api/routes/assaytype/assaytype_controller.py b/src/hs_ontology_api/routes/assaytype/assaytype_controller.py index 4ca052f..33d472a 100644 --- a/src/hs_ontology_api/routes/assaytype/assaytype_controller.py +++ b/src/hs_ontology_api/routes/assaytype/assaytype_controller.py @@ -1,45 +1,102 @@ +# The assaytype routes replace the equivalent legacy routes in the search-api. +# July 2024 - refactored to work with new UBKG assay class model. + from flask import Blueprint, jsonify, current_app, request, make_response -from ubkg_api.common_routes.validate import validate_application_context +from ubkg_api.utils.http_error_string import (get_404_error_string, validate_query_parameter_names, + validate_parameter_value_in_enum,validate_required_parameters) -from hs_ontology_api.utils.neo4j_logic import assaytype_get_logic, assaytype_name_get_logic +#from hs_ontology_api.utils.neo4j_logic import assaytype_get_logic, assaytype_name_get_logic +from hs_ontology_api.utils.neo4j_logic import assayclasses_get_logic +from hs_ontology_api.utils.listdict import remove_duplicate_dicts_from_list assaytype_blueprint = Blueprint('assaytype', __name__, url_prefix='/assaytype') @assaytype_blueprint.route('', methods=['GET']) -def assaytype_get(): - """ - Get all of the assaytypes without query parameter. - ?primary=true Only get those where record['primary'] == True - ?primary=false Only get those where record['primary'] == False +def route_assaytype_get(): + return assaytype_get() + +@assaytype_blueprint.route('/', methods=['GET']) +def route_assaytype_name_get(name): + return assaytype_get(name) - :return: +def assaytype_get(name=None): + """ + Returns information for the legacy assaytype and assaytype/{name} endpoints. + :param name: corresponds to the assaytype """ - primary: bool = request.args.get('primary', default=None) - if primary is not None: - primary = primary.lower() == 'true' - application_context = validate_application_context() - neo4j_instance = current_app.neo4jConnectionHelper.instance() - return jsonify(assaytype_get_logic(neo4j_instance, primary, application_context)) + # Validate parameters. -@assaytype_blueprint.route('/', methods=['GET']) -def assaytype_name_get(name): - """Get all of the assaytypes with name. - This is a replacement for search-src endpoint of the same name. + # Check for invalid parameter names. + err = validate_query_parameter_names(parameter_name_list=['application_context', 'primary']) + if err != 'ok': + return make_response(err, 400) - :param name: AssayType name - :type name: str - :param application_context: Filter to indicate application context - :type application_context: str + # Check for required parameters. + application_context = request.args.get('application_context') + # For assaytype, the default is HUBMAP. + if application_context is None: + application_context = 'HUBMAP' + + # Check for valid application context. + val_enum = ['HUBMAP', 'SENNET'] + err = validate_parameter_value_in_enum(param_name='application_context', param_value=application_context, + enum_list=val_enum) + if err != 'ok': + return make_response(err, 400) + + # Check for valid parameter values. + # Map the legacy is_primary parameter to new process_state parameter. + is_primary = request.args.get('primary') + if is_primary is not None: + is_primary = is_primary.lower() + val_enum = ['true', 'false'] + err = validate_parameter_value_in_enum(param_name='primary', param_value=is_primary, enum_list=val_enum) + if err != 'ok': + return make_response(err, 400) + if is_primary == 'true': + process_state = 'primary' + else: + process_state = 'derived' + else: + process_state = None - :rtype: Union[AssayTypePropertyInfo, Tuple[AssayTypePropertyInfo, int], Tuple[AssayTypePropertyInfo, int, Dict[str, str]] - """ - application_context = validate_application_context() neo4j_instance = current_app.neo4jConnectionHelper.instance() - result = assaytype_name_get_logic(neo4j_instance, name, None, application_context) - if result is None: - # JAS Oct 2023 changed from 400 to 404 - return make_response(f"No such assay_type {name}", 404) - return jsonify(result) + result = assayclasses_get_logic( + neo4j_instance, assaytype=name, process_state=process_state, context=application_context) + + if (result is None or result == []): + # Empty result + err = get_404_error_string(prompt_string=f"No results for " + f"specified parameters") + return make_response(err, 404) + + # Build the legacy response from the new response. + listresponse=[] + for r in result: + assaytype = {} + val = r.get('value') + assaytype['name'] = val.get('assaytype') + # July 2024 - Process states have changed from a simple binary for primary/derived to a categorical + # value of 'primary', 'derived', or 'epic'. The legacy model will observe the binary model. + assaytype['primary'] = val.get('process_state') == 'primary' + #assaytype['primary'] = val.get('primary') + assaytype['description'] = val.get('description') + assaytype['vitessce-hints'] = val.get('vitessce_hints') + # The vis-only and contains-pii properties have been deprecated. + assaytype['vis-only'] = 'deprecated' + assaytype['contains-pii'] = 'deprecated' + listresponse.append(assaytype) + + # Remove duplicates. There will likely be both "non-DCWG" and "DCWG" rules for the same assaytype, for which the + # subset used for assaytype response will contain duplicates. + listunique = remove_duplicate_dicts_from_list(listinput=listresponse, indexval='name') + + # The assaytype endpoint returns a list of objects that is the value of a key named 'result'. + # The assaytype/{name} endpoint returns a single object. + if len(listunique) ==1: + return jsonify(listunique[0]) + else: + return jsonify({'result':listunique}) diff --git a/src/hs_ontology_api/routes/deprecated/__init__.py b/src/hs_ontology_api/routes/deprecated/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/hs_ontology_api/routes/deprecated/datasets/__init__.py b/src/hs_ontology_api/routes/deprecated/datasets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/hs_ontology_api/routes/datasets/datasets_controller.py b/src/hs_ontology_api/routes/deprecated/datasets/datasets_controller.py similarity index 100% rename from src/hs_ontology_api/routes/datasets/datasets_controller.py rename to src/hs_ontology_api/routes/deprecated/datasets/datasets_controller.py diff --git a/src/hs_ontology_api/routes/fieldassays/fieldassays_controller.py b/src/hs_ontology_api/routes/fieldassays/fieldassays_controller.py index 75f4fc4..3d85d29 100644 --- a/src/hs_ontology_api/routes/fieldassays/fieldassays_controller.py +++ b/src/hs_ontology_api/routes/fieldassays/fieldassays_controller.py @@ -15,36 +15,24 @@ def field_assays_get(name=None): """ # Get optional filtering parameters: - # assay_identifier - legacy identifier for assay dataset from field_assays.yaml. - # Can be a data_type, alt-name, or description. - # data_type - legacy data_type, used in ingestion workflows - # dataset_type - "soft assay" dataset type, used by the Rules Engine + # assaytype - legacy identifier for assay dataset from field_assays.yaml. # Validate parameters - err = validate_query_parameter_names(['assay_identifier', 'data_type', 'dataset_type']) + err = validate_query_parameter_names(['assaytype']) if err != 'ok': return make_response(err, 400) - assay_identifier = request.args.get('assay_identifier') - data_type = request.args.get('data_type') - dataset_type = request.args.get('dataset_type') + assaytype = request.args.get('assaytype') neo4j_instance = current_app.neo4jConnectionHelper.instance() - result = field_assays_get_logic(neo4j_instance, field_name=name, assay_identifier=assay_identifier, - data_type=data_type, dataset_type=dataset_type) + result = field_assays_get_logic(neo4j_instance, field_name=name, assaytype=assaytype) iserr = False if result is None or result == []: iserr = True else: - # Check for no assay associations. - # The result object is an list of dicts if the route is unfiltered (field-associations) and a dict - # if filtered (field-associations/{name}). - if type(result) == list: - assays = result[0].get('assays') - else: - assays = result.get('assays') - iserr = len(assays) == 0 + fields = result.get('fields') + iserr = len(fields)==0 if iserr: # Empty result diff --git a/src/hs_ontology_api/utils/http_error_string.py b/src/hs_ontology_api/utils/http_error_string.py index 886d9b0..2279856 100644 --- a/src/hs_ontology_api/utils/http_error_string.py +++ b/src/hs_ontology_api/utils/http_error_string.py @@ -121,3 +121,11 @@ def validate_parameter_value_in_enum(param_name=None, param_value=None, enum_lis f"Refer to the SmartAPI documentation for this endpoint for more information." return "ok" + +def validate_application_context(): + + application_context = request.args.get('application_context') + if application_context is None: + return abort(jsonify( + f'Invalid application_context ({application_context}) specified. Please pass one of SENNET or HUBMAP')), 400 + return application_context diff --git a/src/hs_ontology_api/utils/listdict.py b/src/hs_ontology_api/utils/listdict.py new file mode 100644 index 0000000..f937cea --- /dev/null +++ b/src/hs_ontology_api/utils/listdict.py @@ -0,0 +1,24 @@ +# coding: utf-8 + +def remove_duplicate_dicts_from_list(listinput: list[dict], indexval: str) -> list[dict]: + """ + Removes duplicate dictionaries from a list of dictionaries. + Assumes that each dictionary has a key with values that can be treated as an index. + + :param dictinput: list of dictionaries with duplicates--e.g., [{"a":"1"},{"b":"2"},{"a":"1}] + :param indexkey: name of the unique --e.g., "a" + :return: a list of dictionaries without duplicates--e.g., [{"a":"1"},{"b":"2"}] + + """ + + listunique = {} + listcheck = listinput + index = 0 + while index < len(listcheck): + if listcheck[index]['name'] in listunique: + del listcheck[index] + else: + listunique[listcheck[index]['name']] = 1 + index += 1 + + return listcheck \ No newline at end of file diff --git a/src/hs_ontology_api/utils/neo4j_logic.py b/src/hs_ontology_api/utils/neo4j_logic.py index 49a8d5b..d053476 100644 --- a/src/hs_ontology_api/utils/neo4j_logic.py +++ b/src/hs_ontology_api/utils/neo4j_logic.py @@ -1234,21 +1234,18 @@ def field_types_info_get_logic(neo4j_instance, type_source=None): return fieldtypes -def field_assays_get_logic(neo4j_instance, field_name=None, assay_identifier=None, - data_type=None, dataset_type=None) -> List[FieldAssay]: +def field_assays_get_logic(neo4j_instance, field_name=None, assaytype=None) -> dict: """ Returns detailed information on the associations between a metadata field and assay datasets. :param neo4j_instance: connection to UBKG instance :param field_name: optional filter: name of field - :param assay_identifier: optional filter: name of assay_identifier used in legacy field_assays.yaml. - This corresponds to a data_type; an alt-name, or a description. - :param data_type: legacy data_type - :param dataset_type: soft assay dataset type + :param assaytype: optional filter: name of assay_identifier used in legacy field_assays.yaml. Although + the legacy yaml allows for alt-names and descriptions, these are no longer valid. :return: """ # response list - fieldassays: [FieldAssay] = [] + fieldassays: [dict] = [] # Used in WHERE clauses when no filter is needed. identity_filter = '1=1' @@ -1265,54 +1262,27 @@ def field_assays_get_logic(neo4j_instance, field_name=None, assay_identifier=Non field_filter = f" AND tField.name = '{field_name}'" query = query.replace('$field_filter', field_filter) - # Allow for filtering on assay_identifier. - if assay_identifier is None: + # Allow for filtering on assaytype. + if assaytype is None: assay_type_filter = f'AND {identity_filter}' else: - assay_type_filter = f" AND tAssay.name='{assay_identifier}'" + assay_type_filter = f" AND tAssayType.name='{assaytype}'" query = query.replace('$assay_type_filter', assay_type_filter) - # Allow for filtering on data_type and dataset_type - list_data_filters = [] - if data_type is None: - list_data_filters.append(identity_filter) - else: - list_data_filters.append(f"data_type='{data_type}'") - - if dataset_type is None: - list_data_filters.append(identity_filter) - else: - list_data_filters.append(f"dataset_type='{dataset_type}'") - - if len(list_data_filters) == 0: - filter = f' WHERE {identity_filter}' - else: - filter = ' WHERE ' + ' AND '.join(list_data_filters) - - query = query.replace('$data_type_dataset_type_filters', filter) - with neo4j_instance.driver.session() as session: # Execute Cypher query. recds: neo4j.Result = session.run(query) - record_count = 0 - - # Build response object. Valid responses contain something in the assays element other than - # ['none|none|none']. - for record in recds: + for field in recds: + resp = field.get('fieldassays') try: - if record.get('assays') != ['none|none|none']: - fieldassay: FieldAssay = \ - FieldAssay(code_ids=record.get('code_ids'), - name=record.get('field_name'), - assays=record.get('assays')).serialize() - fieldassays.append(fieldassay) - record_count = record_count + 1 + fieldassays.append(resp) + except KeyError: pass - return fieldassays + return resp def field_schemas_get_logic(neo4j_instance, field_name=None, mapping_source=None, schema=None) -> List[FieldSchema]: """ @@ -1451,4 +1421,67 @@ def field_entities_get_logic(neo4j_instance, field_name=None, source=None, entit except KeyError: pass - return fieldentities \ No newline at end of file + return fieldentities + +def assayclasses_get_logic(neo4j_instance,assayclass=None, assaytype=None, process_state=None, context=None) -> dict: + """ + July 2024 + Obtains information on the assay classes (rule-based dataset "kinds") that are specified in + the testing rules json file. + + The return from the query is a complete JSON, so there is no need for a model class. + + :param neo4j_instance: neo4j connection + :param assayclass: either the code for the assay class's rule or the value of rule_description + :param assaytype: the assaytype + :param context: application context--i.e., HUBMAP or SENNET + :param process_state: in the enum ['primary','derived','epic'] + + example: if a assay class's rule has rule_description="non-DCWG primary AF" and rule code "HUBMAP:C200001", either + "non-DCWG primary AF" or "C200001" will result in selection of the assay class. The application context is used + to identify the complete rule code. + + """ + assayclasses: [dict] = [] + + # Load and parameterize query. + querytxt = loadquerystring('assayclass.cypher') + + # Filter by application context. + querytxt = querytxt.replace('$context', context) + + # Filter by assay class + if assayclass is not None: + querytxt = querytxt.replace('$assayclass_filter', f"AND (cRBD.CodeID = context+':{assayclass}' OR tRBD.name='{assayclass}')") + else: + querytxt = querytxt.replace('$assayclass_filter','') + + # Filter by process_state + if process_state is None: + querytxt = querytxt.replace('$process_state_filter','') + else: + querytxt = querytxt.replace('$process_state_filter',f"AND tdsProcess.name='{process_state}'") + + # Filter by assaytype, but only if this is the general endpoint. + #(The endpoint that filters by assayclass assumes a single response; assaytype is not unique.) + if assaytype is None: + querytxt = querytxt.replace('$assaytype_filter','') + elif assayclass is None: + querytxt = querytxt.replace('$assaytype_filter', f"AND REPLACE(tassaytype.name,'_assaytype','') = '{assaytype}'") + else: + querytxt = querytxt.replace('$assaytype_filter','') + # Set timeout for query based on value in app.cfg. + query = neo4j.Query(text=querytxt, timeout=neo4j_instance.timeout) + + with neo4j_instance.driver.session() as session: + recds: neo4j.Result = session.run(query) + for record in recds: + + assayclass = record.get('rule_based_datasets') + try: + assayclasses.append(assayclass) + + except KeyError: + pass + + return assayclasses \ No newline at end of file diff --git a/src/main.py b/src/main.py index c0f675e..8839c2d 100755 --- a/src/main.py +++ b/src/main.py @@ -1,11 +1,12 @@ from os import path -from flask import Flask, jsonify, current_app +from flask import Flask from pathlib import Path from ubkg_api.app import UbkgAPI, logger from hs_ontology_api.routes.assaytype.assaytype_controller import assaytype_blueprint from hs_ontology_api.routes.assayname.assayname_controller import assayname_blueprint -from hs_ontology_api.routes.datasets.datasets_controller import datasets_blueprint +# July 2024 deprecated datasets +# from src.hs_ontology_api.routes.deprecated.datasets.datasets_controller import datasets_blueprint from hs_ontology_api.routes.organs.organs_controller import organs_blueprint from hs_ontology_api.routes.relationships.relationships_controller import relationships_blueprint from hs_ontology_api.routes.valueset.valueset_controller import valueset_blueprint @@ -26,6 +27,9 @@ from hs_ontology_api.routes.fieldtypesinfo.fieldtypesinfo_controller import field_types_info_blueprint from hs_ontology_api.routes.fieldentities.fieldentities_controller import field_entities_blueprint +# JAS July 2024 +from hs_ontology_api.routes.assayclasses.assayclasses_controller import assayclasses_blueprint + # Cells API client # May 2024 deprecated # from hs_ontology_api.utils.cellsclient import OntologyCellsClient @@ -43,7 +47,9 @@ def make_flask_config(): app.register_blueprint(assaytype_blueprint) app.register_blueprint(assayname_blueprint) -app.register_blueprint(datasets_blueprint) +# July 2024 - deprecating datasets +#app.register_blueprint(datasets_blueprint) + app.register_blueprint(organs_blueprint) app.register_blueprint(relationships_blueprint) app.register_blueprint(valueset_blueprint) @@ -63,6 +69,8 @@ def make_flask_config(): app.register_blueprint(field_schemas_blueprint) app.register_blueprint(field_types_info_blueprint) app.register_blueprint(field_entities_blueprint) +# July 2024 +app.register_blueprint(assayclasses_blueprint) # Instantiate a Cells API client. diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000..974c9ee --- /dev/null +++ b/test/README.md @@ -0,0 +1,44 @@ +# Ontology API for HuBMAP and SenNet applications + +## Test scripts + +### Background + +#### UBKG API deployment +The hs-ontology-api is a [child UBKG API](https://ubkg.docs.xconsortia.org/api/#child-ubkg-api-instances). +The hs-ontology-api contains code for endpoints that work with a UBKG instance in the HuBMAP/SenNet [context](https://ubkg.docs.xconsortia.org/contexts/)-- +i.e., with data specific to HuBMAP and SenNet; however, it also accepts calls to endpoints of the +generic UBKG base context. The code for generic endpoints is stored in the ubkg-api; +the hs-ontology-api integrates with the ubkg-api that is compiled as a library +(PyPi package). + +A **UBKG API deployment** corresponds to the combination of the ubkg-api and one or more child APIs. +The HuBMAP/SenNet UBKG API deployment combines ubkg-api and hs-ontology-api. + +#### API Gateway +The endpoint URLs for the HuBMAP/SenNet UBKG API deployment are managed by a AWS API Gateway. +The gateway manages the union of all endpoint URLs. + +### Types of Testing + +#### Endpoints: unit and regression testing +To unit test or regression test endpoints of a UBKG API deployment, the developer should +work within the API instance that houses the endpoint code. In other words, +test hs-ontology-api endpoints by instantiating hs-ontology-api; test ubkg-api +endpoints by instantiating ubkg-api. + +In particular, testing new functionality in ubkg-api by executing against an instance +of hs-ontology-api is not a valid test methodology. Because the hs-ontology-api +works with a compiled package version of ubkg-api, it is better to test ubkg-api +endpoints in a development ubkg-api instance; once the ubkg-api endpoint is validated, the +ubkg-api package can be recompiled for use by the hs-ontology-api. + +To test hs-ontology-api endpoints, use the **test_api.s**h script in this folder. +The script writes output to the test.out file, which is ignored by git. + +#### Gateway: integration testing +Because the API gateway manages the endpoint URLs of all components of a UBKG API instance, it +is often necessary to test the union of endpoint URLs. This is not a functional test of a particular endpoint; +instead, it is an integration test of the gateway configuration. + +To test the gateway for the HuBMAP/SenNet UBKG API deployment, use the **test_gateway.sh** script in this folder. diff --git a/test/test_api.sh b/test/test_api.sh index 42f18c8..009beef 100755 --- a/test/test_api.sh +++ b/test/test_api.sh @@ -1,6 +1,6 @@ #!/bin/bash ########## -# Test script for UBKG API +# Test script for hs-ontology API ########## @@ -14,7 +14,7 @@ Help() # Display Help echo "" echo "****************************************" - echo "HELP: UBKG API test script" + echo "HELP: hs-ontology API test script" echo echo "Syntax: ./test_api.sh [-option]..." echo "option" @@ -36,6 +36,9 @@ while getopts ":hv:" option; do esac done +# Check for environment parameter. +: ${env:?Missing environment parameter (-v). Run this script with -h for options.} + # Environment URLs. UBKG_URL_PROD=https://ontology.api.hubmapconsortium.org UBKG_URL_DEV=https://ontology-api.dev.hubmapconsortium.org @@ -54,6 +57,7 @@ case "$env" in esac + echo "Using UBKG at: ${UBKG_URL}" | tee test.out echo "For these tests, only first 60 characters of output from HTTP 200 returns displayed." | tee -a test.out echo "To review response bodies in detail, call endpoints individually." | tee -a test.out @@ -75,7 +79,7 @@ echo "/assayname_POST with bulk-RNA => should return 200" | tee -a test.out curl --request POST \ --url "${UBKG_URL}/assayname" \ --header "Content-Type: application/json" \ - --data '{"name": "bulk-RNA"}' | cut -c1-60 | tee -a test.out + --data '{"name": ["bulk-RNA"]}' |tee -a test.out echo echo | tee -a test.out echo | tee -a test.out @@ -93,31 +97,72 @@ echo | tee -a test.out echo | tee -a test.out -echo "TESTS FOR: assaytypes GET" | tee -a test.out -echo "SIGNATURE: /assaytypes/?application_context=" | tee -a test.out +echo "TESTS FOR: assayclasses GET" | tee -a test.out +echo "SIGNATURE: /assayclasses?application_context=&process_state=" | tee -a test.out echo | tee -a test.out echo | tee -a test.out -echo "/assaytypes/bulk-RNA?application_context=HUBMAP => should return 200" | tee -a test.out +echo "1. /assayclasses?application_context=x => invalid application context; should return 400" | tee -a test.out curl --request GET \ - --url "${UBKG_URL}/assaytype/bulk-RNA?application_context=HUBMAP" \ - --header "Accept: application/json" | cut -c1-60 | tee -a test.out + --url "${UBKG_URL}/assayclasses?application_context=HUBMAPx" \ + --header "Accept: application/json" | tee -a test.out +echo +echo | tee -a test.out +echo | tee -a test.out +echo "2. /assayclasses => missing application context; should return 400" | tee -a test.out +curl --request GET \ + --url "${UBKG_URL}/assayclasses?" \ + --header "Accept: application/json" | tee -a test.out echo echo | tee -a test.out echo | tee -a test.out -echo "TESTS FOR: datasets GET" | tee -a test.out -echo "SIGNATURE: /datasets?application_context=&data_type=&description=&alt_name=&primary=&contains_pii=&vis_only=&vitessce_hint=&dataset_provider=" | tee -a test.out +echo "3. /assayclasses => invalid parameter; should return 400" | tee -a test.out +curl --request GET \ + --url "${UBKG_URL}/assayclasses?application_context=HUBMAP&process_state=x" \ + --header "Accept: application/json" | tee -a test.out +echo echo | tee -a test.out echo | tee -a test.out -echo "/datasets?application_context=HUBMAP => should return 200" | tee -a test.out +echo "4. /assayclasses => valid, all; should return 200" | tee -a test.out curl --request GET \ - --url "${UBKG_URL}/datasets?application_context=HUBMAP" \ - --header "Accept: application/json" | cut -c1-60 | tee -a test.out + --url "${UBKG_URL}/assayclasses?application_context=HUBMAP" \ +--header "Accept: application/json" | cut -c1-60 | tee -a test.out +echo +echo | tee -a test.out +echo | tee -a test.out + +echo "5. /assayclasses => valid, all, primary; should return 200" | tee -a test.out +curl --request GET \ + --url "${UBKG_URL}/assayclasses?application_context=HUBMAP&process_state=primary" \ +--header "Accept: application/json" | cut -c1-60 | tee -a test.out +echo +echo | tee -a test.out +echo | tee -a test.out + +echo "6. /assayclasses/AFX => invalid assayclass; should return 404" | tee -a test.out +curl --request GET \ + --url "${UBKG_URL}/assayclasses/AFX?application_context=HUBMAP" \ +--header "Accept: application/json" | tee -a test.out +echo +echo | tee -a test.out +echo | tee -a test.out + +echo "7. /assayclasses/non-DCWG primary AF => valid assayclass; should return 200" | tee -a test.out +curl --request GET \ + --url "${UBKG_URL}/assayclasses/non-DCWG%20primary%20AF?application_context=HUBMAP" \ +--header "Accept: application/json" | cut -c1-60 | tee -a test.out echo echo | tee -a test.out echo | tee -a test.out +echo "8. /assayclasses/C200001 => valid assayclass; should return 200" | tee -a test.out +curl --request GET \ + --url "${UBKG_URL}/assayclasses/C200001?application_context=HUBMAP" \ +--header "Accept: application/json" | cut -c1-60 | tee -a test.out +echo +echo | tee -a test.out +echo | tee -a test.out echo "TESTS FOR: organs GET" | tee -a test.out echo "SIGNATURE: /organs?application_context=" | tee -a test.out @@ -179,9 +224,9 @@ echo "SIGNATURE: /valueset?child_sabs=&parent_cod echo | tee -a test.out echo | tee -a test.out -echo "/valueset?child_sabs=OBI&parent_sab=HUBMAP&parent_code=C001000 => should return 200" | tee -a test.out +echo "/valueset?child_sabs=OBI&parent_sab=HUBMAP&parent_code=C000002 => should return 200" | tee -a test.out curl --request GET \ - --url "${UBKG_URL}/valueset?child_sabs=OBI&parent_sab=HUBMAP&parent_code=C001000" \ + --url "${UBKG_URL}/valueset?child_sabs=OBI&parent_sab=HUBMAP&parent_code=C000002" \ --header "Content-Type: application/json" | cut -c1-60 | tee -a test.out echo echo | tee -a test.out @@ -504,7 +549,7 @@ echo | tee -a test.out echo | tee -a test.out echo "TESTS FOR: field-assays GET" | tee -a test.out -echo "SIGNATURE: /field-assays/?data_type=&dataset_type=?assaytype=" | tee -a test.out echo | tee -a test.out echo | tee -a test.out @@ -532,53 +577,23 @@ echo echo | tee -a test.out echo | tee -a test.out -echo "4. /field-assays?assay_identifier=X => no results; should return 404" | tee -a test.out +echo "4. /field-assays?assaytype=X => no results; should return 404" | tee -a test.out echo "SHOULD RETURN 404; no results" curl --request GET \ - --url "${UBKG_URL}/field-assays?assay_identifier=X" \ - --header "Content-Type: application/json" | cut -c1-60 | tee -a test.out -echo -echo | tee -a test.out -echo | tee -a test.out - -echo "5. /field-assays?assay_identifier=snRNAseq => should return 200" | tee -a test.out -curl --request GET \ - --url "${UBKG_URL}/field-assays?assay_identifier=snRNAseq" \ - --header "Content-Type: application/json" | cut -c1-60 | tee -a test.out -echo -echo | tee -a test.out -echo | tee -a test.out - -echo "5. /field-assays?data_type=X => no results; should return 404" | tee -a test.out -curl --request GET \ - --url "${UBKG_URL}/field-assays?data_type=X" \ + --url "${UBKG_URL}/field-assays?assaytype=X" \ --header "Content-Type: application/json" | cut -c1-60 | tee -a test.out echo echo | tee -a test.out echo | tee -a test.out -echo "6. /field-assays?data_type=seqFISH => should return 200" | tee -a test.out +echo "5. /field-assays?assaytype=snRNAseq => should return 200" | tee -a test.out curl --request GET \ - --url "${UBKG_URL}/field-assays?data_type=seqFISH" \ + --url "${UBKG_URL}/field-assays?assaytype=snRNAseq" \ --header "Content-Type: application/json" | cut -c1-60 | tee -a test.out echo echo | tee -a test.out echo | tee -a test.out -echo "7. field-assays?dataset_type=X => no results; should return 404" | tee -a test.out -curl --request GET \ - --url "${UBKG_URL}/field-assays?dataset_type=x" \ - --header "Content-Type: application/json" -echo - -echo "8. /field-assays?dataset_type=RNAseq => should return 200" | tee -a test.out -curl --request GET \ - --url "${UBKG_URL}/field-assays?dataset_type=RNAseq" \ - --header "Content-Type: application/json"| cut -c1-60 | tee -a test.out -echo -echo | tee -a test.out -echo | tee -a test.out - echo "TESTS FOR: field-entities GET" | tee -a test.out echo "SIGNATURE: /field-entities/?source=&entity=&application_context=" | tee -a test.out echo | tee -a test.out diff --git a/test_api.sh b/test/test_gateway.sh similarity index 66% rename from test_api.sh rename to test/test_gateway.sh index 90ad345..0ae4f52 100755 --- a/test_api.sh +++ b/test/test_gateway.sh @@ -1,6 +1,6 @@ #!/bin/bash ########## -# Test script for UBKG API +# Test script for hs-ontology-api integration with UBKG API ########## @@ -21,6 +21,56 @@ Help() echo "-v test environment: l (local), d (DEV), or p (PROD)" } +######################################################################## +# function to test the exact HTTP code and some characteristic of the +# JSON body of the response. +# When $1 is not blank, is will be used to see if it exactly matches +# the returned JSON body. +# When $1 is blank, $4 will be used to see if the JSON body is at +# least that length. +######################################################################## +evaluate_JSON_body() +{ + ENDPOINT=$1 + EXPECTED_HTTP_RESPONSE_CODE=$2 + EXPECTED_JSON=$3 + + # If we're not doing an exact match on the JSON, then we should be + # doing a test that the length of the returned JSON is at least as + # long as expected. + if [[ "$#" -gt 3 ]]; then + EXPECTED_JSON_LENGTH=$4 + else + EXPECTED_JSON_LENGTH=0 + fi + + # Execute the endpoint, and save the response + response code to a + # string that can be split apart. + # JAS corrected parameter for pipe to wc (-c instead of --chars). + CURL_OUTPUT=$(curl --request GET \ + --url "${UBKG_URL}${ENDPOINT}" \ + --header "Content-Type: application/json" \ + --silent \ + --write-out "-_-_-_->http_code=%{http_code}") + HTTP_RESPONSE_CODE=$(echo ${CURL_OUTPUT} | sed 's/.*-_-_-_->http_code=//') + RESPONSE_JSON=$(echo ${CURL_OUTPUT} | sed 's/.-_-_-_->http_code=.*//') + JSON_LENGTH=$(echo $RESPONSE_JSON | wc -c) + + # Evaluate the result, either for an exact match or a minimum length JSON body + if [[ "$HTTP_RESPONSE_CODE" != "$EXPECTED_HTTP_RESPONSE_CODE" ]]; then + echo "FAILED. Got $HTTP_RESPONSE_CODE response when expecting $EXPECTED_HTTP_RESPONSE_CODE" + else + if [[ -n "$EXPECTED_JSON" && "$RESPONSE_JSON" != "$EXPECTED_JSON" ]]; then + echo "FAILED. Response JSON does not match expected JSON." + elif [[ -n $JSON_LENGTH && $JSON_LENGTH -lt $EXPECTED_JSON_LENGTH ]]; then + echo "FAILED. Response JSON $JSON_LENGTH chars is shorter than expected length of $EXPECTED_JSON_LENGTH." + else + echo "SUCCEEDED. Response HTTP code and JSON match expectations." + fi + fi + echo +} + ##### # Get options while getopts ":hv:" option; do @@ -36,6 +86,9 @@ while getopts ":hv:" option; do esac done +# Check for environment parameter. +: ${env:?Missing environment parameter (-v). Run this script with -h for options.} + # Environment URLs. UBKG_URL_PROD=https://ontology.api.hubmapconsortium.org UBKG_URL_DEV=https://ontology-api.dev.hubmapconsortium.org @@ -80,9 +133,10 @@ curl --request GET \ --header "Accept: application/json" |cut -c1-60 echo -echo "datasets GET" +# JAS update: datasets replaced with assayclasses +echo "assayclasses GET" curl --request GET \ - --url "${UBKG_URL}/datasets?application_context=HUBMAP" \ + --url "${UBKG_URL}/assayclasses?application_context=HUBMAP" \ --header "Accept: application/json" |cut -c1-60 echo @@ -112,7 +166,7 @@ echo echo "valueset GET..." curl --request GET \ - --url "${UBKG_URL}/valueset?child_sabs=OBI&parent_sab=HUBMAP&parent_code=C001000" \ + --url "${UBKG_URL}/valueset?child_sabs=OBI&parent_sab=HUBMAP&parent_code=C000002" \ --header "Content-Type: application/json" |cut -c1-60 echo @@ -510,4 +564,135 @@ echo "SHOULD RETURN 200" curl --request GET \ --url "${UBKG_URL}/field-schemas?schema=imc3d" \ --header "Content-Type: application/json" |cut -c1-60 -echo \ No newline at end of file +echo + +# Develop tests for the following endpoints. +# Snatch reasonable arguments from +# https://smart-api.info/ui/96e5b5c0b0efeef5b93ea98ac2794837/#/ +# + +echo "/concepts/paths/subgraph expecting HTTP 200 response" +evaluate_JSON_body \ + '/concepts/paths/subgraph?sab=SNOMEDCT_US&rel=isa&skip=0&limit=10' \ + '200' \ + '' \ + 8021 + +# /concepts/{concept_id}/paths/expand - ubkg-api + +echo "/concepts/C0006142/paths/expand?sab=SNOMEDCT_US&rel=isa&maxdepth=1 expecting HTTP 200 response" +evaluate_JSON_body \ + '/concepts/C0006142/paths/expand?sab=SNOMEDCT_US&rel=isa&maxdepth=1' \ + '200' \ + '' \ + 526167 + +echo "/concepts/C0006142/paths/trees?sab=SNOMEDCT_US&rel=isa&maxdepth=0 expecting HTTP 200 response" +evaluate_JSON_body \ + '/concepts/C0006142/paths/trees?sab=SNOMEDCT_US&rel=isa&maxdepth=0' \ + '200' \ + '' \ + 41113 + +echo "/concepts/C2720507/nodeobjects expecting HTTP 200 response" +evaluate_JSON_body \ + '/concepts/C2720507/nodeobjects' \ + '200' \ + '{"nodeobjects":[{"node":{"codes":[{"codeid":"MTH:NOCODE","sab":"MTH","terms":[{"name":"SNOMED CT Concept (SNOMED RT+CTV3)","tty":"PN"}]},{"codeid":"SNOMEDCT_US:138875005","sab":"SNOMEDCT_US","terms":[{"name":"SNOMED CT has been created by combining SNOMED RT and a computer-based nomenclature and classification known as Read Codes Version 3, which was created on behalf of the U.K. Department of Health.","tty":"SY"},{"name":"\u00a9 2002-2024 International Health Terminology Standards Development Organisation (IHTSDO). All rights reserved. SNOMED CT\u00ae, was originally created by The College of American Pathologists. \"SNOMED\" and \"SNOMED CT\" are registered trademarks of the IHTSDO.","tty":"SY"},{"name":"SNOMED CT Concept (SNOMED RT+CTV3)","tty":"FN"},{"name":"SNOMED CT Concept","tty":"PT"}]},{"codeid":"SRC:V-SNOMEDCT_US","sab":"SRC","terms":[{"name":"US Edition of SNOMED CT","tty":"RPT"},{"name":"SNOMED CT Concept","tty":"RHT"},{"name":"SNOMED CT, US Edition","tty":"SSN"},{"name":"SNOMEDCT_US","tty":"RAB"}]}],"cui":"C2720507","definitions":[],"pref_term":"SNOMED CT Concept (SNOMED RT+CTV3)","semantic_types":[{"def":"A conceptual entity resulting from human endeavor. Concepts assigned to this type generally refer to information created by humans for some purpose.","stn":"A2.4","sty":"Intellectual Product","tui":"T170"}]}}]}' + +echo "/concepts/C2720507/paths/shortestpath/C1272753?sab=SNOMEDCT_US&rel=isa expecting HTTP 200 response" +evaluate_JSON_body \ + '/concepts/C2720507/paths/shortestpath/C1272753?sab=SNOMEDCT_US&rel=isa' \ + '200' \ + '' \ + 1385 + +echo "/database/server expecting HTTP 200 response" +evaluate_JSON_body \ + '/database/server'\ + '200' \ + '{"edition":"community","version":"5.11.0"}' + +echo "/node-types expecting HTTP 200 response" +evaluate_JSON_body \ + '/node-types' \ + '200' \ + '{"node_types":["Code","Concept","Definition","Semantic","Term"]}' + +echo "/node-types/Code/counts?sab=MSH expecting HTTP 200 response" +evaluate_JSON_body \ + '/node-types/Code/counts?sab=MSH' \ + '200' \ + '{"node_types":[{"node_type":{"count":5124728,"label":"Code"}}],"total_count":5124728}' + +echo "/node-types/Code/counts-by-sab?sab=MSH expecting HTTP 200 response" +evaluate_JSON_body \ + '/node-types/Code/counts-by-sab?sab=MSH' \ + '200' \ + '{"node_types":[{"node_type":{"count":354269,"label":"Code","sabs":[{"count":354269,"sab":"MSH"}]}}],"total_count":354269}' + +echo "/property-types expecting HTTP 200 response" +evaluate_JSON_body \ + '/property-types' \ + '200' \ + '{"property_types":["ATUI","CODE","CUI","CodeID","DEF","SAB","STN","TUI","evidence_class","lowerbound","name","upperbound","value"]}' + +echo "/relationship-types expecting HTTP 200 response" +evaluate_JSON_body \ + '/relationship-types' \ + '200' \ + '' \ + 43529 + +echo "/sabs expecting HTTP 200 response" +evaluate_JSON_body \ + '/sabs' \ + '200' \ + '' \ + 2238010 + +echo "/sabs/codes/counts expecting HTTP 200 response" +evaluate_JSON_body \ + '/sabs/codes/counts' \ + '200' \ + '' \ + 44855 + +echo "/sabs/MSH/codes/counts expecting HTTP 200 response" +evaluate_JSON_body \ + '/sabs/MSH/codes/counts' \ + '200' \ + '{"sabs":[{"count":354269,"position":1,"sab":"MSH"}]}' + +echo "/sabs/MSH/codes/details expecting HTTP 200 response" +evaluate_JSON_body \ + '/sabs/MSH/codes/details' \ + '200' \ + '' \ + 177514 + +echo "/sabs/MSH/term-types expecting HTTP 200 response" +evaluate_JSON_body \ + '/sabs/MSH/term-types' \ + '200' \ + '{"sab":"MSH","term_types":["PM","N1","ET","DEV","DSV","MH","PEP","XQ","PXQ","QEV","TQ","QAB","PCE","NM","CE","HT","QSV","HS"]}' + +echo "/semantics/semantic-types expecting HTTP 200 response" +evaluate_JSON_body \ + '/semantics/semantic-types' \ + '200' \ + '' \ + 31214 + +echo "/semantics/semantic-types/T071 expecting HTTP 200 response" +evaluate_JSON_body \ + '/semantics/semantic-types/T071' \ + '200' \ + '{"semantic_types":[{"position":1,"semantic_type":{"def":"A broad type for grouping physical and conceptual entities.","stn":"A","sty":"Entity","tui":"T071"}}]}' + +echo "/semantics/semantic-types/T071 expecting HTTP 200 response" +evaluate_JSON_body \ + '/semantics/semantic-types/T071/subtypes' \ + '200' \ + '' \ + 20574 \ No newline at end of file