diff --git a/README.rst b/README.rst index 0d80bda3..f61df143 100644 --- a/README.rst +++ b/README.rst @@ -28,7 +28,7 @@ Creating a conda environment and installing the library (tested with OSX) ========================================================================= macOS ------ +----- .. code-block:: bash $ ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" @@ -75,10 +75,10 @@ This program will convert a BIDS MRI dataset to a NIDM-Experiment RDF document. This program will represent a BIDS MRI dataset as a NIDM RDF document and provide user with opportunity to annotate the dataset (i.e. create sidecar files) and associate selected variables with broader concepts to make datasets more - FAIR. + FAIR. Note, you must obtain an API key to Interlex by signing up for an account at scicrunch.org then going to My Account - and API Keys. Then set the environment variable INTERLEX_API_KEY with your key. + and API Keys. Then set the environment variable INTERLEX_API_KEY with your key. optional arguments: -h, --help show this help message and exit @@ -183,7 +183,7 @@ If you want to merge NIDM files on subject ID see pynidm merge -o, --out_file TEXT File to write concatenated NIDM files [required] --help Show this message and exit. - + visualize --------- This command will produce a visualization(pdf) of the supplied NIDM files @@ -197,7 +197,7 @@ named the same as the input files and stored in the same directories. -nl, --nidm_file_list TEXT A comma separated list of NIDM files with full path [required] --help Show this message and exit. - + merge ----- This function will merge NIDM files. See command line parameters for @@ -268,25 +268,25 @@ Options: -nl, --nidm_file_list TEXT A comma-separated list of NIDM files with full path [required] -r, --regularization TEXT Parameter, if set, will return the results of - the linear regression with L1 or L2 regularization - depending on the type specified, and the weight + the linear regression with L1 or L2 regularization + depending on the type specified, and the weight with the maximum likelihood solution. This will prevent overfitting. (Ex: -r L1) -model, --ml TEXT An equation representing the linear regression. The dependent variable comes first, followed by "=" or "~", followed by the independent variables separated by "+" - (Ex: -model "fs_003343 = age*sex + sex + + (Ex: -model "fs_003343 = age*sex + sex + age + group + age*group + bmi") [required] -contstant, --ctr TEXT Parameter, if set, will return differences in variable relationships by group. One or - multiple parameters can be used (multiple + multiple parameters can be used (multiple parameters should be separated by a comma- separated list) (Ex: -contrast group,age) -o, --output_file TEXT Optional output file (TXT) to store results of query --help Show this message and exit. - + To use the linear regression algorithm successfully, structure, syntax, and querying is important. Here is how to maximize the usefulness of the tool: @@ -304,7 +304,7 @@ Now that we have selected the variables, we can perform a linear regression. In The command to use for this particular data is: pynidm linear-regression -nl /simple2_NIDM_examples/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl,/simple2_NIDM_examples/datasets.datalad.org/abide/RawDataBIDS/CMU_b/nidm.ttl -model "fs_000008 = DX_GROUP + PIQ_tca9ck + http://uri.interlex.org/ilx_0100400" -contrast "DX_GROUP" -r L1 --nl specifies the file(s) to pull data from, while -model is the model to perform a linear regression model on. In this case, the variables are fs_000008 (the dependent variable, supratentorial brain volume), DX_GROUP (diagnostic group), PIQ_tca9ck (PIQ), and http://uri.interlex.org/ilx_0100400 (age at scan). The -contrast parameter says to contrast the data using DX_GROUP, and then do a L1 regularization to prevent overfitting. +-nl specifies the file(s) to pull data from, while -model is the model to perform a linear regression model on. In this case, the variables are fs_000008 (the dependent variable, supratentorial brain volume), DX_GROUP (diagnostic group), PIQ_tca9ck (PIQ), and http://uri.interlex.org/ilx_0100400 (age at scan). The -contrast parameter says to contrast the data using DX_GROUP, and then do a L1 regularization to prevent overfitting. Details on the REST API URI format and usage can be found below. @@ -645,4 +645,3 @@ Additional NIDM-related Tools * Freesurfer stats -> NIDM * FSL structural segmentation -> NIDM * ANTS structural segmentation -> NIDM - diff --git a/docker/Dockerfile b/docker/Dockerfile index 1e958786..e2940807 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -35,4 +35,3 @@ RUN git config --global user.name "docker user" && git config --global user.emai ENV TMPDIR=/opt/project/cache COPY . . - diff --git a/docker/Dockerfile-datalad b/docker/Dockerfile-datalad index a82ef086..4f03cd57 100644 --- a/docker/Dockerfile-datalad +++ b/docker/Dockerfile-datalad @@ -11,4 +11,3 @@ RUN wget -O- http://neuro.debian.net/lists/bionic.us-nh.full | tee /etc/apt/sour RUN apt-get install -y git-annex-standalone RUN datalad install ///abide2/RawData/NYU_1 - diff --git a/docker/README.md b/docker/README.md index 51b47311..d587d180 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,7 +1,7 @@ # Docker and REST API This dockerfile can be used to create a development docker container suitable for both -interactive use and also as a proof of concept REST API server. +interactive use and also as a proof of concept REST API server. #Build the container @@ -11,7 +11,7 @@ To build the container, start in this directory and use the command: ``` ./build.sh ``` - + ## Interactive You can then run the container interactively with: @@ -25,13 +25,13 @@ in the directory /opt/PyNIDM in the container. ## REST Server This section assumes you have the PyNIDM source code in ~/PyNIDM. You should -also put any NIDM ttl files you want the REST s erver to process somewhere under the +also put any NIDM ttl files you want the REST s erver to process somewhere under the ~/PyNIDM/ttl directory. Once you have done those things, use the command: ``` ./runrest.sh ``` -This should start a HTTP server that is listening on port 5000 of your +This should start a HTTP server that is listening on port 5000 of your local system. You should be able to connect to the following routes: ``` http://localhost:5000/projects @@ -41,5 +41,5 @@ http://localhost:5000/projects/[Project-UUID]/subjects/[Subject-UUID] ``` After the server is started you can continue to modify the files in your -~/PyNIDM/ttl directory and those changes will immediately be reflected in the +~/PyNIDM/ttl directory and those changes will immediately be reflected in the REST API results. diff --git a/docker/build.sh b/docker/build.sh index 9e338ea2..0f1e6863 100755 --- a/docker/build.sh +++ b/docker/build.sh @@ -2,4 +2,4 @@ mkdir -p ../cache sudo docker build -f Dockerfile -t pynidm . -sudo docker build -f Dockerfile-rest -t pynidm-rest . \ No newline at end of file +sudo docker build -f Dockerfile-rest -t pynidm-rest . diff --git a/docker/rest-blaze.py b/docker/rest-blaze.py index 44e501c2..c57dd9e0 100755 --- a/docker/rest-blaze.py +++ b/docker/rest-blaze.py @@ -1,22 +1,23 @@ #!/usr/bin/python -from flask import Flask, request -from flask_restful import Resource, Api import glob -from nidm.experiment.tools.rest import RestParser +import os +from flask import Flask, request from flask_cors import CORS +from flask_restful import Api, Resource +from nidm.experiment.tools.rest import RestParser import simplejson -import os + def getTTLFiles(): files = [] - for filename in glob.glob('/opt/project/ttl/**/*.ttl', recursive=True): + for filename in glob.glob("/opt/project/ttl/**/*.ttl", recursive=True): files.append(filename) return files + class NIDMRest(Resource): def get(self, all): - query_bits = [] for a in request.args.keys(): query_bits.append("{}={}".format(a, request.args.get(a))) @@ -24,28 +25,38 @@ def get(self, all): files = getTTLFiles() if len(files) == 0: - return ({'error' : 'No NIDM files found. You may need to add NIDM ttl files to ~/PyNIDM/ttl'}) - restParser = RestParser(output_format=RestParser.OBJECT_FORMAT, verbosity_level=5) - - json_str = simplejson.dumps(restParser.run(files, "{}?{}".format(all, query)), indent=2) - response = app.response_class(response=json_str, status=200, mimetype='application/json') + return { + "error": "No NIDM files found. You may need to add NIDM ttl files to ~/PyNIDM/ttl" + } + restParser = RestParser( + output_format=RestParser.OBJECT_FORMAT, verbosity_level=5 + ) + + json_str = simplejson.dumps( + restParser.run(files, "{}?{}".format(all, query)), indent=2 + ) + response = app.response_class( + response=json_str, status=200, mimetype="application/json" + ) return response + class Instructions(Resource): def get(self): - - return ({'message' : 'You probably want to start at {}projects See instructions at PyNIDM/docker/README.md for details on the API and loading data.'.format(request.url_root)}) - + return { + "message": "You probably want to start at {}projects See instructions at PyNIDM/docker/README.md for details on the API and loading data.".format( + request.url_root + ) + } app = Flask(__name__) CORS(app) api = Api(app) -api.add_resource(Instructions, '/') -api.add_resource(NIDMRest, '/') - -if __name__ == '__main__': +api.add_resource(Instructions, "/") +api.add_resource(NIDMRest, "/") - os.system('java -server -Xmx512mb -jar /opt/blazegraph.jar') - app.run(debug=True, host='0.0.0.0') \ No newline at end of file +if __name__ == "__main__": + os.system("java -server -Xmx512mb -jar /opt/blazegraph.jar") + app.run(debug=True, host="0.0.0.0") diff --git a/docs/Makefile b/docs/Makefile index 34b8a748..7049e7b2 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/manuscripts/linreg_joss/paper.md b/docs/manuscripts/linreg_joss/paper.md index e97a861e..6c14b84a 100644 --- a/docs/manuscripts/linreg_joss/paper.md +++ b/docs/manuscripts/linreg_joss/paper.md @@ -8,19 +8,19 @@ tags: authors: - name: Ashmita Kumar affiliation: 1 - - name: Albert Crowley + - name: Albert Crowley affiliation: 2 - - name: Nazek Quedar + - name: Nazek Quedar affiliation: 3 - - name: JB Poline + - name: JB Poline affiliation: 4 - - name: Satrajit S. Ghosh + - name: Satrajit S. Ghosh affiliation: 5 - name: David Kennedy affiliation: 6 - name: Jeff Grethe affiliation: 7 - - name: Karl G. Helmer + - name: Karl G. Helmer affiliation: 8 - name: David B. Keator affiliation: 3 @@ -29,17 +29,17 @@ affiliations: index: 1 - name: TCG, Inc., Washington, DC, USA. index: 2 - - name: University of California, Irvine. Psychiatry and Human Behavior, Irvine, CA., USA. + - name: University of California, Irvine. Psychiatry and Human Behavior, Irvine, CA., USA. index: 3 - - name: McGill University, Montreal, Faculty of Medicine and Health Sciences, Department of Neurology and Neurosurgery, McConnell Brain Imaging Centre, Canada. + - name: McGill University, Montreal, Faculty of Medicine and Health Sciences, Department of Neurology and Neurosurgery, McConnell Brain Imaging Centre, Canada. index: 4 - - name: McGovern Institute for Brain Research, Massachusetts Institute of Technology (MIT), Cambridge, MA, USA; Department of Otolaryngology, Harvard Medical School, Boston, MA, USA. + - name: McGovern Institute for Brain Research, Massachusetts Institute of Technology (MIT), Cambridge, MA, USA; Department of Otolaryngology, Harvard Medical School, Boston, MA, USA. index: 5 - - name: Eunice Kennedy Shriver Center, Department of Psychiatry, University of Massachusetts Medical School, Worcester, Massachusetts, 01655, USA. + - name: Eunice Kennedy Shriver Center, Department of Psychiatry, University of Massachusetts Medical School, Worcester, Massachusetts, 01655, USA. index: 6 - - name: Center for Research in Biological Systems, University of California, San Diego, USA. + - name: Center for Research in Biological Systems, University of California, San Diego, USA. index: 7 - - name: Massachusetts General Hospital, Department of Radiology; Harvard Medical School, Department of Radiology, Boston, MA, USA. + - name: Massachusetts General Hospital, Department of Radiology; Harvard Medical School, Department of Radiology, Boston, MA, USA. index: 8 date: 2 August 2021 bibliography: paper.bib @@ -49,7 +49,7 @@ bibliography: paper.bib The Neuroimaging Data Model (NIDM)[@Keator2013-fs; @NIDM_Working_Group_undated-eb; @Maumet2016-ab] was started by an international team of cognitive scientists, computer scientists and statisticians to develop a data format capable of describing all aspects of the data lifecycle, from raw data through analyses and provenance. NIDM was built on top of the PROV standard[@Moreau2008-bu; @noauthor_undated-he] and consists of three main interconnected specifications: Experiment, Results, and Workflow. These specifications were envisioned to capture information on all aspects of the neuroimaging data lifecycle, using semantic web techniques[@noauthor_undated-cd] which are essentially annotated graphs. These graphs can be serialized into a variety of text-based document formats (NIDM documents), and using the capabilities of the semantic web, can be used to link datasets together based on both the consistency of annotations across NIDM documents and using ontologies that relate terms used in the annotations. They provide a critical capability to aid in reproducibility and replication of studies, as well as data discovery in shared resources. The NIDM-Experiment module consists of a simple project - session - acquisition hierarchy which can be used to describe both the content and metadata about experimental studies. It has has been used to describe many large publicly-available human neuroimaging datasets (e.g. ABIDE[@Di_Martino2014-af], ADHD200[@noauthor_undated-os], CoRR[@Zuo2014-tw], OpenNeuro[@noauthor_undated-ex] datasets) along with providing unambiguous descriptions of the clinical, neuropsychological, and imaging data collected as part of those studies. This has resulted in approximately 4.5 million statements about aspects of these datasets. -PyNIDM[@noauthor_undated-on], a toolbox written in Python, supports the creation, manipulation, and query of NIDM documents. It is an open-source project hosted on GitHub and distributed under the Apache License, Version 2.0[@noauthor_undated-eh]. PyNIDM is under active development and testing. Tools have been created to support RESTful[@Ravan2020-il] SPARQL[@noauthor_undated-gx] queries of the NIDM documents (i.e. pynidm query) in support of users wanting to identify interesting cohorts across datasets in support of evaluating scientific hypotheses and/or replicating results found in the literature. This query functionality, together with the NIDM document semantics, provides a path for investigators to interrogate datasets, understand what data was collected in those studies, and provide sufficiently-annotated data dictionaries of the variables collected to facilitate transformation and combining of data across studies. Additional tools have been developed to import information into NIDM graphs, to visualize the graphs, to merge graphs based on study participant IDs, and to convert semantic-web serializations to more common data structures such as JSON-LD (linked-data variant of JSON). +PyNIDM[@noauthor_undated-on], a toolbox written in Python, supports the creation, manipulation, and query of NIDM documents. It is an open-source project hosted on GitHub and distributed under the Apache License, Version 2.0[@noauthor_undated-eh]. PyNIDM is under active development and testing. Tools have been created to support RESTful[@Ravan2020-il] SPARQL[@noauthor_undated-gx] queries of the NIDM documents (i.e. pynidm query) in support of users wanting to identify interesting cohorts across datasets in support of evaluating scientific hypotheses and/or replicating results found in the literature. This query functionality, together with the NIDM document semantics, provides a path for investigators to interrogate datasets, understand what data was collected in those studies, and provide sufficiently-annotated data dictionaries of the variables collected to facilitate transformation and combining of data across studies. Additional tools have been developed to import information into NIDM graphs, to visualize the graphs, to merge graphs based on study participant IDs, and to convert semantic-web serializations to more common data structures such as JSON-LD (linked-data variant of JSON). Beyond the existing tools that have been written to support NIDM graph documents, some high-level statistical analysis tools are needed to provide investigators with an opportunity to gain more insight into data they may be interested in combining for a complete scientific investigation. Combining datasets collected across different studies is not a trivial task. It requires both a complete, unambiguous description of the data and how it was collected (NIDM-Experiment graphs), along with a varying number of transformations to align, where possible, disparate data. The process of transforming data is often quite time consuming and therefore understanding whether the identified datasets, at a high level, might have some interesting relationships prior to committing to a full scientific study is prudent. Here we report on a tool that provides such capabilities; namely, to provide a simple linear modeling tool supporting the comparison of user-selected variables and information stored in NIDM documents and accessible in a consistent form with other PyNIDM tools (i.e. pynidm linear-regression). @@ -67,7 +67,7 @@ Thus, the algorithm provides the benefit of a machine learning algorithm that ca ![pynidm linear-regression parameters\label{fig:Fig-1}](fig-1.png) -The tool provides a simple command-line user interface (\autoref{fig:Fig-1}) based on the Click Python library[@noauthor_undated-az] which integrates the linear regression module with existing pynidm tools (e.g. pynidm query, pynidm convert, pynidm visualize). +The tool provides a simple command-line user interface (\autoref{fig:Fig-1}) based on the Click Python library[@noauthor_undated-az] which integrates the linear regression module with existing pynidm tools (e.g. pynidm query, pynidm convert, pynidm visualize). To use the tool, the user runs the command pynidm linear-regression with a variety of required and optional parameters. The first parameter “-nl” is a comma- separated list of NIDM serialized TURTLE files, each representing a single dataset or a collection site within a multi-site research project (\autoref{fig:Fig-2}). A useful set of NIDM documents describing publicly-available neuroimaging data from the ABIDE[@Di_Martino2014-af], ADHD200[@noauthor_undated-os], and CoRR[@Zuo2014-tw] studies along with datasets in the OpenNeuro database can be found on GitHub[@Keator_undated-eb]. NIDM documents contain both data and metadata of participants involved in neuroimaging studies, ranging from assessments to raw and derived (regional brain volume, mass-univariate functional brain analysis) neuroimaging data. @@ -77,17 +77,17 @@ The next parameter, “-model” provides the user with the ability to construct In the example shown in \autoref{fig:Fig-2}, we have first run a pynidm query operation on the NIDM documents and identified four variables of interest: supratentorial brain volume (fs_000008), diagnostic group (DX_GROUP), performance IQ (PIQ_tca9ck), and age. The model specified establishes the relationship between the DV, brain volume, and the IVs, diagnostic group, performance IQ, and age. In this example, fs_000008 is the fixed unique identifier (UUID) of the supratentorial brain volume computed with the FreeSurfer software[@Fischl2012-cq] using the original Magnetic Resonance Imaging (MRI) structural scans of the brain. This particular UUID is fixed because it identifies a specific brain region and measurement computed with the FreeSurfer software and will not change across datasets that derive brain volume measurements with FreeSurfer. DX_GROUP is the name of the study-specific variable describing the diagnostic group assigned to participants. PIQ_tca9ck is the performance IQ measure collected on study participants and is the UUID created for this data element when the NIDM documents were created for this dataset. Note, this particular UUID is not guaranteed to be the same across NIDM documents from different studies. Finally, https://urldefense.com/v3/__http://uri.interlex.org/ilx_0100400__;!!OLgoXmg!DBnZP1sUXq9i6aeL_u17nlbXYqcFpnmGxSssHbXH-p6gBHMjrSfQVqMUwAPF27WQtQ$ is the age of the participants using a URL form to reference a concept describing the high-level measure of age which has been used to annotate the variables measuring age across studies. Here we use a concept URL which has been mapped to each dataset’s separate variables that store the age of participants. By using the concept URL we avoid the complexity of different variable names being used to store consistent information (e.g. age) across datasets. -This example shows that one can select data elements from the NIDM documents for linear regression using three specific forms: (1) using the UUID of the objects in the NIDM graph documents; (2) using the distinct variable name from the original dataset, also stored as metadata in the NIDM graph documents; (3) using a high-level concept that has been associated with specific variables described by the concept across datasets, used to make querying across datasets with different variable names but measuring the same phenomenon easier. We support these three distinct forms of selecting data elements to enable distinct usage patterns. Some investigators will use NIDM documents of their internal studies and want to be able to reference data elements using their study-specific variable names. Other investigators may want to use variables from different studies and thus the variable names are unlikely to be the same; thus, we support the use of selecting variables based on high-level concepts. In practice, users will not often mix forms of referring to data elements within the same model but we show it here to make evident the flexibility of the tool. +This example shows that one can select data elements from the NIDM documents for linear regression using three specific forms: (1) using the UUID of the objects in the NIDM graph documents; (2) using the distinct variable name from the original dataset, also stored as metadata in the NIDM graph documents; (3) using a high-level concept that has been associated with specific variables described by the concept across datasets, used to make querying across datasets with different variable names but measuring the same phenomenon easier. We support these three distinct forms of selecting data elements to enable distinct usage patterns. Some investigators will use NIDM documents of their internal studies and want to be able to reference data elements using their study-specific variable names. Other investigators may want to use variables from different studies and thus the variable names are unlikely to be the same; thus, we support the use of selecting variables based on high-level concepts. In practice, users will not often mix forms of referring to data elements within the same model but we show it here to make evident the flexibility of the tool. ![Output of command in \autoref{fig:Fig-2} with treatment coding (contrast using diagnostic group)\label{fig:Fig-3}](fig-3.png) The “-contrast” parameter allows one to select one or more IVs to contrast the parameter estimates for those IVs. The contrast variable in this example is “DX_GROUP” which describes the diagnostic group of each participant in the ABIDE study. Our tool supports multiple methods of coding treatment variables (e.g. treatment coding (\autoref{fig:Fig-3}), simple coding, sum coding, backward difference coding, and Helmert coding) as made available by the Patsy Python library[@Brooke1923-dx]. The user can select multiple independent variables to contrast and/or contrasts on interactions. -The “-r” parameter allows the user to select L1 (Lasso) or L2 (Ridge) regularization implemented in scikit-learn[@Varoquaux2015-mv]. In either case, regularizing prevents the data from being overfit, potentially improving model generalizability and demonstrating which variables have the strongest relationships with the dependent variable. The regularization weight is iteratively determined across a wide range of regularization weightings using 10-fold cross-validation, selecting the regularization weight yielding the maximum likelihood. +The “-r” parameter allows the user to select L1 (Lasso) or L2 (Ridge) regularization implemented in scikit-learn[@Varoquaux2015-mv]. In either case, regularizing prevents the data from being overfit, potentially improving model generalizability and demonstrating which variables have the strongest relationships with the dependent variable. The regularization weight is iteratively determined across a wide range of regularization weightings using 10-fold cross-validation, selecting the regularization weight yielding the maximum likelihood. # Conclusions -In this work we have designed a linear regression tool that works on linked-data NIDM documents in support of understanding relationships between variables collected across different research studies. This tool helps scientists evaluate relationships between data at a high level prior to fully integrating datasets for hypothesis testing which may require considerable time and resources. In our initial evaluations, this tool has shown utility for these use-cases. In future work we are creating additional machine learning tools allowing users to cluster data in a similar fashion to the linear regression tool presented here. Further, we intend to add additional functionality that uses the data dictionaries for all study variables contained within NIDM documents selected for modeling and applies mapping functions to remap variable values into common ranges and domains, when necessary, prior to running the linear regression models. +In this work we have designed a linear regression tool that works on linked-data NIDM documents in support of understanding relationships between variables collected across different research studies. This tool helps scientists evaluate relationships between data at a high level prior to fully integrating datasets for hypothesis testing which may require considerable time and resources. In our initial evaluations, this tool has shown utility for these use-cases. In future work we are creating additional machine learning tools allowing users to cluster data in a similar fashion to the linear regression tool presented here. Further, we intend to add additional functionality that uses the data dictionaries for all study variables contained within NIDM documents selected for modeling and applies mapping functions to remap variable values into common ranges and domains, when necessary, prior to running the linear regression models. # Acknowledgements diff --git a/docs/source/conf.py b/docs/source/conf.py index d57b56cd..d022a11f 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -31,37 +31,39 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ['sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'sphinx.ext.ifconfig', - 'sphinx.ext.viewcode'] +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "sphinx.ext.ifconfig", + "sphinx.ext.viewcode", +] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'PyNIDM' -copyright = '2018, dbkeator@uci.edu, camille.maumet@inria.fr, satrajit.ghosh@gmail.com, djarecka@gmail.com' -author = 'dbkeator@uci.edu, camille.maumet@inria.fr, satrajit.ghosh@gmail.com, djarecka@gmail.com' +project = "PyNIDM" +copyright = "2018, dbkeator@uci.edu, camille.maumet@inria.fr, satrajit.ghosh@gmail.com, djarecka@gmail.com" +author = "dbkeator@uci.edu, camille.maumet@inria.fr, satrajit.ghosh@gmail.com, djarecka@gmail.com" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.1' +version = "0.1" # The full version, including alpha/beta/rc tags. -release = '0.1.1' +release = "0.1.1" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -76,7 +78,7 @@ exclude_patterns = [] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True @@ -87,8 +89,8 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -#html_theme = 'alabaster' -html_theme = 'sphinx_rtd_theme' +# html_theme = 'alabaster' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -99,13 +101,13 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. -htmlhelp_basename = 'PyNIDMdoc' +htmlhelp_basename = "PyNIDMdoc" # -- Options for LaTeX output --------------------------------------------- @@ -114,15 +116,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -132,8 +131,13 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'PyNIDM.tex', 'PyNIDM Documentation', - 'dbkeator@uci.edu, camille.maumet@inria.fr, satrajit.ghosh@gmail.com, djarecka@gmail.com', 'manual'), + ( + master_doc, + "PyNIDM.tex", + "PyNIDM Documentation", + "dbkeator@uci.edu, camille.maumet@inria.fr, satrajit.ghosh@gmail.com, djarecka@gmail.com", + "manual", + ), ] @@ -141,10 +145,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'pynidm', 'PyNIDM Documentation', - [author], 1) -] +man_pages = [(master_doc, "pynidm", "PyNIDM Documentation", [author], 1)] # -- Options for Texinfo output ------------------------------------------- @@ -153,13 +154,17 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'PyNIDM', 'PyNIDM Documentation', - author, 'PyNIDM', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "PyNIDM", + "PyNIDM Documentation", + author, + "PyNIDM", + "One line description of project.", + "Miscellaneous", + ), ] - - # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {"https://docs.python.org/": None} diff --git a/docs/source/index.rst b/docs/source/index.rst index dabfe420..1a0c8c2a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -279,19 +279,19 @@ Options: -nl, --nidm_file_list TEXT A comma-separated list of NIDM files with full path [required] -r, --regularization TEXT Parameter, if set, will return the results of - the linear regression with L1 or L2 regularization - depending on the type specified, and the weight + the linear regression with L1 or L2 regularization + depending on the type specified, and the weight with the maximum likelihood solution. This will prevent overfitting. (Ex: -r L1) -model, --ml TEXT An equation representing the linear regression. The dependent variable comes first, followed by "=" or "~", followed by the independent variables separated by "+" - (Ex: -model "fs_003343 = age*sex + sex + + (Ex: -model "fs_003343 = age*sex + sex + age + group + age*group + bmi") [required] -contstant, --ctr TEXT Parameter, if set, will return differences in variable relationships by group. One or - multiple parameters can be used (multiple + multiple parameters can be used (multiple parameters should be separated by a comma- separated list) (Ex: -contrast group,age) -o, --output_file TEXT Optional output file (TXT) to store results @@ -316,7 +316,7 @@ Now that we have selected the variables, we can perform a linear regression. In The command to use for this particular data is: pynidm linear-regression -nl /simple2_NIDM_examples/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl,/simple2_NIDM_examples/datasets.datalad.org/abide/RawDataBIDS/CMU_b/nidm.ttl -model "fs_000008 = DX_GROUP + PIQ_tca9ck + http://uri.interlex.org/ilx_0100400" -contrast "DX_GROUP" -r L1 --nl specifies the file(s) to pull data from, while -model is the model to perform a linear regression model on. In this case, the variables are fs_000008 (the dependent variable, supratentorial brain volume), DX_GROUP (diagnostic group), PIQ_tca9ck (PIQ), and http://uri.interlex.org/ilx_0100400 (age at scan). The -contrast parameter says to contrast the data using DX_GROUP, and then do a L1 regularization to prevent overfitting. +-nl specifies the file(s) to pull data from, while -model is the model to perform a linear regression model on. In this case, the variables are fs_000008 (the dependent variable, supratentorial brain volume), DX_GROUP (diagnostic group), PIQ_tca9ck (PIQ), and http://uri.interlex.org/ilx_0100400 (age at scan). The -contrast parameter says to contrast the data using DX_GROUP, and then do a L1 regularization to prevent overfitting. Details on the REST API URI format and usage can be found below. diff --git a/nidm/__init__.py b/nidm/__init__.py index b27124af..5974db69 100644 --- a/nidm/__init__.py +++ b/nidm/__init__.py @@ -5,6 +5,7 @@ try: import etelemetry + etelemetry.check_available_version("incf-nidash/pynidm", __version__) except ImportError: pass diff --git a/nidm/core/BIDS_Constants.py b/nidm/core/BIDS_Constants.py index b26b51ca..93c1d713 100644 --- a/nidm/core/BIDS_Constants.py +++ b/nidm/core/BIDS_Constants.py @@ -1,87 +1,89 @@ #!/usr/bin/env python -''' BIDS Terms -> NIDM-Exp Mappings +""" BIDS Terms -> NIDM-Exp Mappings @author: David Keator -''' +""" from . import Constants -#BIDS dataset_description -> NIDM constants mappings -dataset_description = { - "BIDSVersion" : Constants.NIDM_PROJECT_IDENTIFIER, - "Name" : Constants.NIDM_PROJECT_NAME, - "Procedure" : Constants.NIDM_PROJECT_DESCRIPTION, - "License" : Constants.NIDM_PROJECT_LICENSE, - "ReferencesAndLinks" : Constants.NIDM_PROJECT_REFERENCES, - "Authors" : Constants.NIDM_AUTHOR, - "DatasetDOI" : Constants.NIDM_DOI, - "Funding" : Constants.NIDM_FUNDING, - "HowToAcknowledge" : Constants.NIDM_ACKNOWLEDGEMENTS +# BIDS dataset_description -> NIDM constants mappings +dataset_description = { + "BIDSVersion": Constants.NIDM_PROJECT_IDENTIFIER, + "Name": Constants.NIDM_PROJECT_NAME, + "Procedure": Constants.NIDM_PROJECT_DESCRIPTION, + "License": Constants.NIDM_PROJECT_LICENSE, + "ReferencesAndLinks": Constants.NIDM_PROJECT_REFERENCES, + "Authors": Constants.NIDM_AUTHOR, + "DatasetDOI": Constants.NIDM_DOI, + "Funding": Constants.NIDM_FUNDING, + "HowToAcknowledge": Constants.NIDM_ACKNOWLEDGEMENTS, } -#BIDS Participants file -> NIDM constants mappings +# BIDS Participants file -> NIDM constants mappings participants = { - "participant_id" : Constants.NIDM_SUBJECTID - #"sex" : Constants.NIDM_GENDER, - #"age" : Constants.NIDM_AGE, - #"gender" : Constants.NIDM_GENDER, - #"diagnosis" : Constants.NIDM_DIAGNOSIS, - #"handedness" : Constants.NIDM_HANDEDNESS + "participant_id": Constants.NIDM_SUBJECTID + # "sex" : Constants.NIDM_GENDER, + # "age" : Constants.NIDM_AGE, + # "gender" : Constants.NIDM_GENDER, + # "diagnosis" : Constants.NIDM_DIAGNOSIS, + # "handedness" : Constants.NIDM_HANDEDNESS } -#scan metadata -> NIDM constants mappings +# scan metadata -> NIDM constants mappings scans = { - "anat" : Constants.NIDM_MRI_ANATOMIC_SCAN, - "func" : Constants.NIDM_MRI_FUNCTION_SCAN, - "dwi" : Constants.NIDM_MRI_DWI_SCAN, - "bval" : Constants.NIDM_MRI_DWI_BVAL, - "bvec" : Constants.NIDM_MRI_DWI_BVEC, - "T1w" : Constants.NIDM_MRI_T1, - "T2w" : Constants.NIDM_MRI_T2, - "inplaneT2" : Constants.NIDM_MRI_T2, - "bold" : Constants.NIDM_MRI_FLOW, - "dti" : Constants.NIDM_MRI_DIFFUSION_TENSOR, - "asl" : Constants.NIDM_MRI_ASL + "anat": Constants.NIDM_MRI_ANATOMIC_SCAN, + "func": Constants.NIDM_MRI_FUNCTION_SCAN, + "dwi": Constants.NIDM_MRI_DWI_SCAN, + "bval": Constants.NIDM_MRI_DWI_BVAL, + "bvec": Constants.NIDM_MRI_DWI_BVEC, + "T1w": Constants.NIDM_MRI_T1, + "T2w": Constants.NIDM_MRI_T2, + "inplaneT2": Constants.NIDM_MRI_T2, + "bold": Constants.NIDM_MRI_FLOW, + "dti": Constants.NIDM_MRI_DIFFUSION_TENSOR, + "asl": Constants.NIDM_MRI_ASL, } -#JSON file keys +# JSON file keys json_keys = { ##Image terms - "run" : Constants.NIDM_ACQUISITION_ENTITY, - "ImageType" : Constants.DICOM["ImageType"], - "ManufacturerModelName" : Constants.DICOM["ManufacturerModelName"], - "Manufacturer" : Constants.DICOM["Manufacturer"], - "ScanningSequence" : Constants.DICOM["ScanningSequence"], - "SequenceVariant" : Constants.DICOM["SequenceVariant"], - "ScanOptions" : Constants.DICOM["ScanOptions"], - "MRAcquisitionType" : Constants.DICOM["MRAcquisitionType"], - "SequenceName" : Constants.DICOM["SequenceName"], - "RepetitionTime" : Constants.DICOM["RepetitionTime"], - "RepetitionTimePreparation" : Constants.BIDS["RepetitionTimePreparation"], - "ArterialSpinLabelingType" : Constants.BIDS["ArterialSpinLabelingType"], - "PostLabelingDelay" : Constants.BIDS["PostLabelingDelay"], - "BackgroundSuppression" : Constants.BIDS["BackgroundSuppression"], - "BackgroundSuppressionPulseTime" : Constants.BIDS["BackgroundSuppressionPulseTime"], - "BackgroundSuppressionNumberPulses" : Constants.BIDS["BackgroundSuppressionNumberPulses"], - "LabelingLocationDescription" : Constants.BIDS["LabelingLocationDescription"], - "LookLocker" : Constants.BIDS["LookLocker"], - "LabelingEfficiency" : Constants.BIDS["LabelingEfficiency"], - "LabelingDuration" : Constants.BIDS["LabelingDuration"], - "LabelingPulseAverageGradient" : Constants.BIDS["LabelingPulseAverageGradient"], - "LabelingPulseMaximumGradient" : Constants.BIDS["LabelingPulseMaximumGradient"], - "LabelingPulseDuration" : Constants.BIDS["LabelingPulseDuration"], - "LabelingPulseFlipAngle" : Constants.BIDS["LabelingPulseFlipAngle"], - "LabelingPulseInterval" : Constants.BIDS["LabelingPulseInterval"], - "PCASLType" : Constants.BIDS["PCASLType"], - "M0Type" : Constants.BIDS["M0Type"], - "TotalAcquiredPairs" : Constants.BIDS["TotalAcquiredPairs"], - "VascularCrushing" : Constants.BIDS["VascularCrushing"], - "EchoTime" : Constants.BIDS["EchoTime"], - "InversionTime" : Constants.DICOM["InversionTime"], - "NumberOfAverages" : Constants.DICOM["NumberOfAverages"], - "ImagingFrequency" : Constants.DICOM["ImagingFrequency"], - "MagneticFieldStrength" : Constants.DICOM["MagneticFieldStrength"], - "NumberOfPhaseEncodingSteps" : Constants.DICOM["NumberOfPhaseEncodingSteps"], + "run": Constants.NIDM_ACQUISITION_ENTITY, + "ImageType": Constants.DICOM["ImageType"], + "ManufacturerModelName": Constants.DICOM["ManufacturerModelName"], + "Manufacturer": Constants.DICOM["Manufacturer"], + "ScanningSequence": Constants.DICOM["ScanningSequence"], + "SequenceVariant": Constants.DICOM["SequenceVariant"], + "ScanOptions": Constants.DICOM["ScanOptions"], + "MRAcquisitionType": Constants.DICOM["MRAcquisitionType"], + "SequenceName": Constants.DICOM["SequenceName"], + "RepetitionTime": Constants.DICOM["RepetitionTime"], + "RepetitionTimePreparation": Constants.BIDS["RepetitionTimePreparation"], + "ArterialSpinLabelingType": Constants.BIDS["ArterialSpinLabelingType"], + "PostLabelingDelay": Constants.BIDS["PostLabelingDelay"], + "BackgroundSuppression": Constants.BIDS["BackgroundSuppression"], + "BackgroundSuppressionPulseTime": Constants.BIDS["BackgroundSuppressionPulseTime"], + "BackgroundSuppressionNumberPulses": Constants.BIDS[ + "BackgroundSuppressionNumberPulses" + ], + "LabelingLocationDescription": Constants.BIDS["LabelingLocationDescription"], + "LookLocker": Constants.BIDS["LookLocker"], + "LabelingEfficiency": Constants.BIDS["LabelingEfficiency"], + "LabelingDuration": Constants.BIDS["LabelingDuration"], + "LabelingPulseAverageGradient": Constants.BIDS["LabelingPulseAverageGradient"], + "LabelingPulseMaximumGradient": Constants.BIDS["LabelingPulseMaximumGradient"], + "LabelingPulseDuration": Constants.BIDS["LabelingPulseDuration"], + "LabelingPulseFlipAngle": Constants.BIDS["LabelingPulseFlipAngle"], + "LabelingPulseInterval": Constants.BIDS["LabelingPulseInterval"], + "PCASLType": Constants.BIDS["PCASLType"], + "M0Type": Constants.BIDS["M0Type"], + "TotalAcquiredPairs": Constants.BIDS["TotalAcquiredPairs"], + "VascularCrushing": Constants.BIDS["VascularCrushing"], + "EchoTime": Constants.BIDS["EchoTime"], + "InversionTime": Constants.DICOM["InversionTime"], + "NumberOfAverages": Constants.DICOM["NumberOfAverages"], + "ImagingFrequency": Constants.DICOM["ImagingFrequency"], + "MagneticFieldStrength": Constants.DICOM["MagneticFieldStrength"], + "NumberOfPhaseEncodingSteps": Constants.DICOM["NumberOfPhaseEncodingSteps"], "EchoTrainLength": Constants.DICOM["EchoTrainLength"], "PercentSampling": Constants.DICOM["PercentSampling"], "PercentPhaseFieldOfView": Constants.DICOM["PercentPhaseFieldOfView"], - "PixelBandwidth":Constants.DICOM["PixelBandwidth"], + "PixelBandwidth": Constants.DICOM["PixelBandwidth"], "AccelerationFactorPE": Constants.DICOM["AccelerationFactorPE"], "AccelNumReferenceLines": Constants.DICOM["AccelNumReferenceLines"], "TotalScanTimeSec": Constants.DICOM["TotalScanTimeSec"], @@ -91,21 +93,23 @@ "ProtocolName": Constants.DICOM["ProtocolName"], "TransmitCoilName": Constants.DICOM["TransmitCoilName"], "AcquisitionMatrix": Constants.DICOM["AcquisitionMatrix"], - "AcquisitionVoxelSize" : Constants.BIDS["AcquisitionVoxelSize"], + "AcquisitionVoxelSize": Constants.BIDS["AcquisitionVoxelSize"], "InPlanePhaseEncodingDirection": Constants.DICOM["InPlanePhaseEncodingDirection"], "FlipAngle": Constants.BIDS["FlipAngle"], "VariableFlipAngleFlag": Constants.DICOM["VariableFlipAngleFlag"], "PatientPosition": Constants.DICOM["PatientPosition"], "PhaseEncodingDirection": Constants.BIDS["PhaseEncodingDirection"], "SliceTiming": Constants.BIDS["SliceTiming"], - "TotalReadoutTime" : Constants.BIDS["TotalReadoutTime"], - "EffectiveEchoSpacing" : Constants.NIDM["EffectiveEchoSpacing"], - "NumberDiscardedVolumesByScanner" : Constants.NIDM["NumberDiscardedVolumesByScanner"], - "NumberDiscardedVolumesByUser" : Constants.NIDM["NumberDiscardedVolumesByUser"], - "DelayTime" : Constants.NIDM["DelayTime"], - "PulseSequenceType" : Constants.DICOM["PulseSequenceName"], + "TotalReadoutTime": Constants.BIDS["TotalReadoutTime"], + "EffectiveEchoSpacing": Constants.NIDM["EffectiveEchoSpacing"], + "NumberDiscardedVolumesByScanner": Constants.NIDM[ + "NumberDiscardedVolumesByScanner" + ], + "NumberDiscardedVolumesByUser": Constants.NIDM["NumberDiscardedVolumesByUser"], + "DelayTime": Constants.NIDM["DelayTime"], + "PulseSequenceType": Constants.DICOM["PulseSequenceName"], ###Task Stuff - "TaskName" : Constants.NIDM_MRI_FUNCTION_TASK + "TaskName": Constants.NIDM_MRI_FUNCTION_TASK # "CogAtlasID" : # "CogPOID" : # "TaskDescription" : diff --git a/nidm/core/Constants.py b/nidm/core/Constants.py index 0211cd05..0cd21e27 100644 --- a/nidm/core/Constants.py +++ b/nidm/core/Constants.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -''' Definition of constants +""" Definition of constants @author: Camille Maumet @copyright: University of Warwick 2014 @@ -8,41 +8,44 @@ 10/3/17 Modified Namespace to be QualifiedName for provtoolbox support...left most of the NIDM-Results Namespaces the same @author: Sanu Ann Abraham 05/04/2018 Added python ProvONE support -''' -import six -from rdflib import Namespace, Graph -from prov.model import ProvDocument, QualifiedName +""" +from collections import namedtuple +from prov.constants import PROV_ATTRIBUTE_LITERALS, PROV_ATTRIBUTE_QNAMES, PROV_N_MAP from prov.model import Namespace as provNamespace -from prov.constants import PROV_ATTRIBUTE_QNAMES, PROV_ATTRIBUTE_LITERALS, \ - PROV_N_MAP +from prov.model import ProvDocument, QualifiedName +from rdflib import Graph, Namespace +import six -from collections import namedtuple DD = namedtuple("DD", ["source", "variable"]) -PROV = Namespace('http://www.w3.org/ns/prov#') -PROVONE = provNamespace('provone', 'http://purl.dataone.org/provone/2015/01/15/ontology#') +PROV = Namespace("http://www.w3.org/ns/prov#") +PROVONE = provNamespace( + "provone", "http://purl.dataone.org/provone/2015/01/15/ontology#" +) -NIDM_URL = 'http://purl.org/nidash/nidm#' +NIDM_URL = "http://purl.org/nidash/nidm#" NIDM = Namespace(NIDM_URL) -NIIRI = Namespace('http://iri.nidash.org/') -AFNI = Namespace('http://purl.org/nidash/afni#') -SPM = Namespace('http://purl.org/nidash/spm#') -FSL = Namespace('http://purl.org/nidash/fsl#') -FREESURFER = Namespace('https://surfer.nmr.mgh.harvard.edu/') -ANTS = Namespace('http://stnava.github.io/ANTs/') -RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#') -CRYPTO = Namespace('http://id.loc.gov/vocabulary/preservation/\ -cryptographicHashFunctions#') -DC = Namespace('http://purl.org/dc/elements/1.1/') -DCT = Namespace('http://purl.org/dc/terms/') -OWL = Namespace('http://www.w3.org/2002/07/owl#') -XSD = Namespace('http://www.w3.org/2001/XMLSchema#') +NIIRI = Namespace("http://iri.nidash.org/") +AFNI = Namespace("http://purl.org/nidash/afni#") +SPM = Namespace("http://purl.org/nidash/spm#") +FSL = Namespace("http://purl.org/nidash/fsl#") +FREESURFER = Namespace("https://surfer.nmr.mgh.harvard.edu/") +ANTS = Namespace("http://stnava.github.io/ANTs/") +RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#") +CRYPTO = Namespace( + "http://id.loc.gov/vocabulary/preservation/\ +cryptographicHashFunctions#" +) +DC = Namespace("http://purl.org/dc/elements/1.1/") +DCT = Namespace("http://purl.org/dc/terms/") +OWL = Namespace("http://www.w3.org/2002/07/owl#") +XSD = Namespace("http://www.w3.org/2001/XMLSchema#") OBO_URL = "http://purl.obolibrary.org/obo/" OBO = Namespace(OBO_URL) -#Added by DBK for NIDM-Experiment 1/13/17 -NFO = Namespace('http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#') +# Added by DBK for NIDM-Experiment 1/13/17 +NFO = Namespace("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#") SCR = Namespace("http://scicrunch.org/resolver/") NLX = Namespace("http://uri.neuinfo.org/nif/nifstd/") SKOS = Namespace("http://www.w3.org/2004/02/skos/core#") @@ -65,14 +68,14 @@ EDAM = Namespace("https://bioportal.bioontology.org/ontologies/EDAM") namespaces = { - # "prov": PROV, + # "prov": PROV, "nidm": NIDM, "niiri": NIIRI, "afni": AFNI, "spm": SPM, "fsl": FSL, - "freesurfer": FREESURFER, - "ants": ANTS, + "freesurfer": FREESURFER, + "ants": ANTS, "rdfs": RDFS, "crypto": CRYPTO, "dct": DCT, @@ -87,474 +90,508 @@ "dctypes": DCTYPES, "ncit": NCIT, "dcat": DCAT, - "birnlex" : BIRNLEX, - "ndar" : NDAR, - "ncicb" : NCICB, - "sio" : SIO, - "bids" : BIDS, - "owl" : OWL, - "onli" : ONLI, - "pato" : PATO, - "datalad" : DATALAD, - "ilx" : INTERLEX, - "edam" : EDAM - } + "birnlex": BIRNLEX, + "ndar": NDAR, + "ncicb": NCICB, + "sio": SIO, + "bids": BIDS, + "owl": OWL, + "onli": ONLI, + "pato": PATO, + "datalad": DATALAD, + "ilx": INTERLEX, + "edam": EDAM, +} # Empty graph used to compute qnames q_graph = Graph() for name, namespace in namespaces.items(): - q_graph.bind(name, namespace) + q_graph.bind(name, namespace) + # DBK Added - Empty graph using provtoolbox used to compute qnames # dj: changed to a new class class NIDMDocument(ProvDocument): - def __init__(self, namespaces=None): - if namespaces is not None: - super(NIDMDocument, self).__init__(namespaces=namespaces) - else: - super(NIDMDocument, self).__init__() + def __init__(self, namespaces=None): + if namespaces is not None: + super(NIDMDocument, self).__init__(namespaces=namespaces) + else: + super(NIDMDocument, self).__init__() # NIDM constants -FSL_GAMMAHRF = FSL['FSL_0000007'] -FSL_FSLS_GAMMA_HRF = FSL['FSL_0000006'] -NIDM_HAS_MRI_PROTOCOL = NIDM['NIDM_0000172'] -NIDM_NUMBER_OF_SUBJECTS = NIDM['NIDM_0000171'] -NIDM_GROUP_NAME = NIDM['NIDM_0000170'] -NIDM_DATA = NIDM['NIDM_0000169'] -NIDM_SPM_RESULTS_NIDM = NIDM['NIDM_0000168'] -NIDM_NIDMFSL = NIDM['NIDM_0000167'] -NIDM_NIDM_RESULTS_EXPORT = NIDM['NIDM_0000166'] -NIDM_NIDM_RESULTS_EXPORTER = NIDM['NIDM_0000165'] -NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE = NIDM['NIDM_0000164'] -NIDM_CONTRAST_EXPLAINED_MEAN_SQUARE_MAP = NIDM['NIDM_0000163'] -NIDM_THRESHOLD = NIDM['NIDM_0000162'] -NIDM_EQUIVALENT_THRESHOLD = NIDM['NIDM_0000161'] -NIDM_P_VALUE_UNCORRECTED = NIDM['NIDM_0000160'] +FSL_GAMMAHRF = FSL["FSL_0000007"] +FSL_FSLS_GAMMA_HRF = FSL["FSL_0000006"] +NIDM_HAS_MRI_PROTOCOL = NIDM["NIDM_0000172"] +NIDM_NUMBER_OF_SUBJECTS = NIDM["NIDM_0000171"] +NIDM_GROUP_NAME = NIDM["NIDM_0000170"] +NIDM_DATA = NIDM["NIDM_0000169"] +NIDM_SPM_RESULTS_NIDM = NIDM["NIDM_0000168"] +NIDM_NIDMFSL = NIDM["NIDM_0000167"] +NIDM_NIDM_RESULTS_EXPORT = NIDM["NIDM_0000166"] +NIDM_NIDM_RESULTS_EXPORTER = NIDM["NIDM_0000165"] +NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE = NIDM["NIDM_0000164"] +NIDM_CONTRAST_EXPLAINED_MEAN_SQUARE_MAP = NIDM["NIDM_0000163"] +NIDM_THRESHOLD = NIDM["NIDM_0000162"] +NIDM_EQUIVALENT_THRESHOLD = NIDM["NIDM_0000161"] +NIDM_P_VALUE_UNCORRECTED = NIDM["NIDM_0000160"] NIDM_P_VALUE_UNCORRECTED_QNAME = q_graph.qname(NIDM_P_VALUE_UNCORRECTED) -NIDM_NOISE_FWHM_IN_VOXELS = NIDM['NIDM_0000159'] -NIDM_NOISE_FWHM_IN_VERTICES = NIDM['NIDM_0000158'] -NIDM_NOISE_FWHM_IN_UNITS = NIDM['NIDM_0000157'] -FSL_FEAT_VERSION = FSL['FSL_0000005'] -FSL_DRIFT_CUTOFF_PERIOD = FSL['FSL_0000004'] -FSL_TEMPORAL_DERIVATIVE = FSL['FSL_0000003'] -FSL_GAUSSIAN_RUNNING_LINE_DRIFT_MODEL = FSL['FSL_0000002'] -FSL_FSLS_GAMMA_DIFFERENCE_HRF = FSL['FSL_0000001'] -SPM_PARTIAL_CONJUNCTION_DEGREE = SPM['SPM_0000015'] -SPM_SMALLEST_SUPRA_THRESHOLD_CLUSTER_SIZE_IN_VOXELS_FWE05 = SPM['SPM_0000014'] -SPM_SMALLEST_SUPRA_THRESHOLD_CLUSTER_SIZE_IN_VOXELS_FDR05 = SPM['SPM_0000013'] -SPM_SMALLEST_SUPRA_THRESHOLD_CLUSTER_SIZE_IN_VERTICES_FWE05 = SPM['SPM_0000012'] -SPM_SMALLEST_SUPRA_THRESHOLD_CLUSTER_SIZE_IN_VERTICES_FDR05 = SPM['SPM_0000011'] -SPM_SEARCH_VOLUME_RESELS_GEOMETRY = SPM['SPM_0000010'] -SPM_TEMPORAL_DERIVATIVE = SPM['SPM_0000006'] -SPM_KCONJUNCTION_INFERENCE = SPM['SPM_0000005'] -SPM_CANONICAL_HRF = SPM['SPM_0000004'] -SPM_DISPERSION_DERIVATIVE = SPM['SPM_0000003'] -SPM_DCT_DRIFT_MODEL = SPM['SPM_0000002'] -SPM_SPMS_DRIFT_CUT_OFF_PERIOD = SPM['SPM_0000001'] -NIDM_CLUSTERSIZEINRESELS = NIDM['NIDM_0000156'] -NIDM_F_MRI_DESIGN = NIDM['NIDM_0000155'] -NIDM_MIXED_DESIGN = NIDM['NIDM_0000154'] -NIDM_EVENT_RELATED_DESIGN = NIDM['NIDM_0000153'] -NIDM_BLOCK_BASED_DESIGN = NIDM['NIDM_0000152'] -NIDM_SINE_BASIS_SET = NIDM['NIDM_0000151'] -NIDM_LINEAR_SPLINE_BASIS_SET = NIDM['NIDM_0000150'] -NIDM_SEARCH_VOLUME_IN_RESELS = NIDM['NIDM_0000149'] -NIDM_RESEL_SIZE_IN_VOXELS = NIDM['NIDM_0000148'] -NIDM_HEIGHT_CRITICAL_THRESHOLD_FWE_05 = NIDM['NIDM_0000147'] -NIDM_HEIGHT_CRITICAL_THRESHOLD_FDR_05 = NIDM['NIDM_0000146'] -NIDM_NOISE_ROUGHNESS_IN_VOXELS = NIDM['NIDM_0000145'] -NIDM_RESELS_PER_VOXEL_MAP = NIDM['NIDM_0000144'] -NIDM_EXPECTED_NUMBER_OF_VOXELS_PER_CLUSTER = NIDM['NIDM_0000143'] -NIDM_EXPECTED_NUMBER_OF_VERTICES_PER_CLUSTER = NIDM['NIDM_0000142'] -NIDM_EXPECTED_NUMBER_OF_CLUSTERS = NIDM['NIDM_0000141'] -NIDM_CLUSTER_CENTER_OF_GRAVITY = NIDM['NIDM_0000140'] -NIDM_COORDINATE_VECTOR_IN_VOXELS = NIDM['NIDM_0000139'] -NIDM_HAS_MAXIMUM_INTENSITY_PROJECTION = NIDM['NIDM_0000138'] -NIDM_SEARCH_VOLUME_IN_VERTICES = NIDM['NIDM_0000137'] -NIDM_SEARCH_VOLUME_IN_UNITS = NIDM['NIDM_0000136'] -NIDM_CONTRAST_VARIANCE_MAP = NIDM['NIDM_0000135'] -NIDM_WITH_ESTIMATION_METHOD = NIDM['NIDM_0000134'] -NIDM_VOXEL_UNITS = NIDM['NIDM_0000133'] -NIDM_VOXEL_TO_WORLD_MAPPING = NIDM['NIDM_0000132'] -NIDM_VOXEL_SIZE = NIDM['NIDM_0000131'] -NIDM_VOXEL6CONNECTED = NIDM['NIDM_0000130'] -NIDM_VOXEL26CONNECTED = NIDM['NIDM_0000129'] -NIDM_VOXEL18CONNECTED = NIDM['NIDM_0000128'] -NIDM_VERSION = NIDM['NIDM_0000127'] -NIDM_VARIANCE_SPATIAL_MODEL = NIDM['NIDM_0000126'] -NIDM_USER_SPECIFIED_THRESHOLD_TYPE = NIDM['NIDM_0000125'] -NIDM_TARGET_INTENSITY = NIDM['NIDM_0000124'] -NIDM_STATISTIC_TYPE = NIDM['NIDM_0000123'] -NIDM_SOFTWARE_VERSION = NIDM['NIDM_0000122'] -NIDM_SEARCH_VOLUME_IN_VOXELS = NIDM['NIDM_0000121'] -NIDM_RANDOM_FIELD_STATIONARITY = NIDM['NIDM_0000120'] -NIDM_Q_VALUE_FDR = NIDM['NIDM_0000119'] -NIDM_PIXEL8CONNECTED = NIDM['NIDM_0000118'] -NIDM_PIXEL4CONNECTED = NIDM['NIDM_0000117'] -NIDM_P_VALUE_UNCORRECTED = NIDM['NIDM_0000116'] -NIDM_P_VALUE_FWER = NIDM['NIDM_0000115'] -NIDM_P_VALUE = NIDM['NIDM_0000114'] -NIDM_OBJECT_MODEL = NIDM['NIDM_0000113'] -NIDM_NUMBER_OF_DIMENSIONS = NIDM['NIDM_0000112'] -NIDM_NUMBER_OF_CLUSTERS = NIDM['NIDM_0000111'] -NIDM_GAUSSIAN_HRF = NIDM['NIDM_0000110'] -NIDM_MIN_DISTANCE_BETWEEN_PEAKS = NIDM['NIDM_0000109'] -NIDM_MAX_NUMBER_OF_PEAKS_PER_CLUSTER = NIDM['NIDM_0000108'] -NIDM_MASKED_MEDIAN = NIDM['NIDM_0000107'] -NIDM_IS_USER_DEFINED = NIDM['NIDM_0000106'] -NIDM_IN_WORLD_COORDINATE_SYSTEM = NIDM['NIDM_0000105'] -NIDM_IN_COORDINATE_SPACE = NIDM['NIDM_0000104'] -NIDM_HAS_MAP_HEADER = NIDM['NIDM_0000103'] -NIDM_HAS_HRF_BASIS = NIDM['NIDM_0000102'] -NIDM_HAS_ERROR_DISTRIBUTION = NIDM['NIDM_0000101'] -NIDM_HAS_ERROR_DEPENDENCE = NIDM['NIDM_0000100'] -NIDM_HAS_CONNECTIVITY_CRITERION = NIDM['NIDM_0000099'] -NIDM_HAS_CLUSTER_LABELS_MAP = NIDM['NIDM_0000098'] -NIDM_HAS_ALTERNATIVE_HYPOTHESIS = NIDM['NIDM_0000097'] -NIDM_GRAND_MEAN_SCALING = NIDM['NIDM_0000096'] -NIDM_ERROR_VARIANCE_HOMOGENEOUS = NIDM['NIDM_0000094'] -NIDM_ERROR_DEGREES_OF_FREEDOM = NIDM['NIDM_0000093'] -NIDM_EQUIVALENT_ZSTATISTIC = NIDM['NIDM_0000092'] -NIDM_EFFECT_DEGREES_OF_FREEDOM = NIDM['NIDM_0000091'] -NIDM_DIMENSIONS_IN_VOXELS = NIDM['NIDM_0000090'] -NIDM_DEPENDENCE_SPATIAL_MODEL = NIDM['NIDM_0000089'] -NIDM_HAS_DRIFT_MODEL = NIDM['NIDM_0000088'] -NIDM_DRIFT_MODEL = NIDM['NIDM_0000087'] -NIDM_COORDINATE_VECTOR = NIDM['NIDM_0000086'] -NIDM_CONTRAST_NAME = NIDM['NIDM_0000085'] -NIDM_CLUSTER_SIZE_IN_VOXELS = NIDM['NIDM_0000084'] -NIDM_CLUSTER_SIZE_IN_VERTICES = NIDM['NIDM_0000083'] -NIDM_CLUSTER_LABEL_ID = NIDM['NIDM_0000082'] -NIDM_WORLD_COORDINATE_SYSTEM = NIDM['NIDM_0000081'] -NIDM_VOXEL_CONNECTIVITY_CRITERION = NIDM['NIDM_0000080'] -NIDM_TWO_TAILED_TEST = NIDM['NIDM_0000079'] -NIDM_TALAIRACH_COORDINATE_SYSTEM = NIDM['NIDM_0000078'] -NIDM_SUBJECT_COORDINATE_SYSTEM = NIDM['NIDM_0000077'] -NIDM_STATISTIC_MAP = NIDM['NIDM_0000076'] -NIDM_STANDARDIZED_COORDINATE_SYSTEM = NIDM['NIDM_0000075'] -NIDM_SPATIALLY_REGULARIZED_MODEL = NIDM['NIDM_0000074'] -NIDM_SPATIALLY_LOCAL_MODEL = NIDM['NIDM_0000073'] -NIDM_SPATIALLY_GLOBAL_MODEL = NIDM['NIDM_0000072'] -NIDM_SPATIAL_MODEL = NIDM['NIDM_0000071'] -NIDM_SUPRA_THRESHOLD_CLUSTER = NIDM['NIDM_0000070'] -NIDM_FOURIER_BASIS_SET = NIDM['NIDM_0000069'] -NIDM_SEARCH_SPACE_MASK_MAP = NIDM['NIDM_0000068'] -NIDM_CUSTOM_BASIS_SET = NIDM['NIDM_0000067'] -NIDM_RESIDUAL_MEAN_SQUARES_MAP = NIDM['NIDM_0000066'] -NIDM_POISSON_DISTRIBUTION = NIDM['NIDM_0000065'] -NIDM_PIXEL_CONNECTIVITY_CRITERION = NIDM['NIDM_0000064'] -NIDM_PEAK_DEFINITION_CRITERIA = NIDM['NIDM_0000063'] -NIDM_PEAK = NIDM['NIDM_0000062'] -NIDM_PARAMETER_ESTIMATE_MAP = NIDM['NIDM_0000061'] -NIDM_ONE_TAILED_TEST = NIDM['NIDM_0000060'] -NIDM_NON_PARAMETRIC_SYMMETRIC_DISTRIBUTION = NIDM['NIDM_0000059'] -NIDM_NON_PARAMETRIC_DISTRIBUTION = NIDM['NIDM_0000058'] -NIDM_NIDM_OBJECT_MODEL = NIDM['NIDM_0000057'] -NIDM_MODEL_PARAMETERS_ESTIMATION = NIDM['NIDM_0000056'] -NIDM_MNI305_COORDINATE_SYSTEM = NIDM['NIDM_0000055'] -NIDM_MASK_MAP = NIDM['NIDM_0000054'] -NIDM_MAP_HEADER = NIDM['NIDM_0000053'] -NIDM_MAP = NIDM['NIDM_0000052'] -NIDM_MNI_COORDINATE_SYSTEM = NIDM['NIDM_0000051'] -NIDM_IXI549_COORDINATE_SYSTEM = NIDM['NIDM_0000050'] -NIDM_INFERENCE = NIDM['NIDM_0000049'] -NIDM_INDEPENDENT_ERROR = NIDM['NIDM_0000048'] -NIDM_ICBM_MNI152_NON_LINEAR6TH_GENERATION_COORDINATE_SYSTEM = NIDM['NIDM_0000047'] -NIDM_ICBM_MNI152_NON_LINEAR2009C_SYMMETRIC_COORDINATE_SYSTEM = NIDM['NIDM_0000046'] -NIDM_ICBM_MNI152_NON_LINEAR2009C_ASYMMETRIC_COORDINATE_SYSTEM = NIDM['NIDM_0000045'] -NIDM_ICBM_MNI152_NON_LINEAR2009B_SYMMETRIC_COORDINATE_SYSTEM = NIDM['NIDM_0000044'] -NIDM_ICBM_MNI152_NON_LINEAR2009B_ASYMMETRIC_COORDINATE_SYSTEM = NIDM['NIDM_0000043'] -NIDM_ICBM_MNI152_NON_LINEAR2009A_SYMMETRIC_COORDINATE_SYSTEM = NIDM['NIDM_0000042'] -NIDM_ICBM_MNI152_NON_LINEAR2009A_ASYMMETRIC_COORDINATE_SYSTEM = NIDM['NIDM_0000041'] -NIDM_ICBM_MNI152_LINEAR_COORDINATE_SYSTEM = NIDM['NIDM_0000040'] -NIDM_ICBM452_WARP5_COORDINATE_SYSTEM = NIDM['NIDM_0000039'] -NIDM_ICBM452_AIR_COORDINATE_SYSTEM = NIDM['NIDM_0000038'] -NIDM_HEMODYNAMIC_RESPONSE_FUNCTION_DERIVATIVE = NIDM['NIDM_0000037'] -NIDM_HEMODYNAMIC_RESPONSE_FUNCTION_BASIS = NIDM['NIDM_0000036'] -NIDM_HEMODYNAMIC_RESPONSE_FUNCTION = NIDM['NIDM_0000035'] -NIDM_HEIGHT_THRESHOLD = NIDM['NIDM_0000034'] -NIDM_GRAND_MEAN_MAP = NIDM['NIDM_0000033'] -NIDM_GAMMA_HRF = NIDM['NIDM_0000031'] -NIDM_GAMMA_HRB = NIDM['NIDM_0000030'] -NIDM_GAMMA_DIFFERENCE_HRF = NIDM['NIDM_0000029'] -NIDM_FINITE_IMPULSE_RESPONSE_HRB = NIDM['NIDM_0000028'] -NIDM_RESULTS = NIDM['NIDM_0000027'] -NIDM_EXTENT_THRESHOLD = NIDM['NIDM_0000026'] -NIDM_EXCURSION_SET_MAP = NIDM['NIDM_0000025'] -NIDM_EXCHANGEABLE_ERROR = NIDM['NIDM_0000024'] -NIDM_ERROR_MODEL = NIDM['NIDM_0000023'] -NIDM_ERROR_DISTRIBUTION = NIDM['NIDM_0000022'] -NIDM_REGRESSOR_NAMES = NIDM['NIDM_0000021'] -NIDM_DISPLAY_MASK_MAP = NIDM['NIDM_0000020'] -NIDM_DESIGN_MATRIX = NIDM['NIDM_0000019'] -NIDM_CUSTOM_COORDINATE_SYSTEM = NIDM['NIDM_0000017'] -NIDM_COORDINATE_SPACE = NIDM['NIDM_0000016'] -NIDM_COORDINATE = NIDM['NIDM_0000015'] -NIDM_LEGENDRE_POLYNOMIAL_ORDER = NIDM['NIDM_0000014'] -NIDM_CONTRAST_STANDARD_ERROR_MAP = NIDM['NIDM_0000013'] -NIDM_CONNECTIVITY_CRITERION = NIDM['NIDM_0000012'] -NIDM_CONJUNCTION_INFERENCE = NIDM['NIDM_0000011'] -NIDM_HAS_FMRI_DESIGN = NIDM['NIDM_0000010'] -NIDM_COLIN27_COORDINATE_SYSTEM = NIDM['NIDM_0000009'] -NIDM_CLUSTER_LABELS_MAP = NIDM['NIDM_0000008'] -NIDM_CLUSTER_DEFINITION_CRITERIA = NIDM['NIDM_0000007'] -NIDM_CLUSTER = NIDM['NIDM_0000006'] -NIDM_BINOMIAL_DISTRIBUTION = NIDM['NIDM_0000005'] -NIDM_BINARY_MAP = NIDM['NIDM_0000004'] -NIDM_CONTRAST_ESTIMATION = NIDM['NIDM_0000001'] -NIDM_CONTRAST_MAP = NIDM['NIDM_0000002'] +NIDM_NOISE_FWHM_IN_VOXELS = NIDM["NIDM_0000159"] +NIDM_NOISE_FWHM_IN_VERTICES = NIDM["NIDM_0000158"] +NIDM_NOISE_FWHM_IN_UNITS = NIDM["NIDM_0000157"] +FSL_FEAT_VERSION = FSL["FSL_0000005"] +FSL_DRIFT_CUTOFF_PERIOD = FSL["FSL_0000004"] +FSL_TEMPORAL_DERIVATIVE = FSL["FSL_0000003"] +FSL_GAUSSIAN_RUNNING_LINE_DRIFT_MODEL = FSL["FSL_0000002"] +FSL_FSLS_GAMMA_DIFFERENCE_HRF = FSL["FSL_0000001"] +SPM_PARTIAL_CONJUNCTION_DEGREE = SPM["SPM_0000015"] +SPM_SMALLEST_SUPRA_THRESHOLD_CLUSTER_SIZE_IN_VOXELS_FWE05 = SPM["SPM_0000014"] +SPM_SMALLEST_SUPRA_THRESHOLD_CLUSTER_SIZE_IN_VOXELS_FDR05 = SPM["SPM_0000013"] +SPM_SMALLEST_SUPRA_THRESHOLD_CLUSTER_SIZE_IN_VERTICES_FWE05 = SPM["SPM_0000012"] +SPM_SMALLEST_SUPRA_THRESHOLD_CLUSTER_SIZE_IN_VERTICES_FDR05 = SPM["SPM_0000011"] +SPM_SEARCH_VOLUME_RESELS_GEOMETRY = SPM["SPM_0000010"] +SPM_TEMPORAL_DERIVATIVE = SPM["SPM_0000006"] +SPM_KCONJUNCTION_INFERENCE = SPM["SPM_0000005"] +SPM_CANONICAL_HRF = SPM["SPM_0000004"] +SPM_DISPERSION_DERIVATIVE = SPM["SPM_0000003"] +SPM_DCT_DRIFT_MODEL = SPM["SPM_0000002"] +SPM_SPMS_DRIFT_CUT_OFF_PERIOD = SPM["SPM_0000001"] +NIDM_CLUSTERSIZEINRESELS = NIDM["NIDM_0000156"] +NIDM_F_MRI_DESIGN = NIDM["NIDM_0000155"] +NIDM_MIXED_DESIGN = NIDM["NIDM_0000154"] +NIDM_EVENT_RELATED_DESIGN = NIDM["NIDM_0000153"] +NIDM_BLOCK_BASED_DESIGN = NIDM["NIDM_0000152"] +NIDM_SINE_BASIS_SET = NIDM["NIDM_0000151"] +NIDM_LINEAR_SPLINE_BASIS_SET = NIDM["NIDM_0000150"] +NIDM_SEARCH_VOLUME_IN_RESELS = NIDM["NIDM_0000149"] +NIDM_RESEL_SIZE_IN_VOXELS = NIDM["NIDM_0000148"] +NIDM_HEIGHT_CRITICAL_THRESHOLD_FWE_05 = NIDM["NIDM_0000147"] +NIDM_HEIGHT_CRITICAL_THRESHOLD_FDR_05 = NIDM["NIDM_0000146"] +NIDM_NOISE_ROUGHNESS_IN_VOXELS = NIDM["NIDM_0000145"] +NIDM_RESELS_PER_VOXEL_MAP = NIDM["NIDM_0000144"] +NIDM_EXPECTED_NUMBER_OF_VOXELS_PER_CLUSTER = NIDM["NIDM_0000143"] +NIDM_EXPECTED_NUMBER_OF_VERTICES_PER_CLUSTER = NIDM["NIDM_0000142"] +NIDM_EXPECTED_NUMBER_OF_CLUSTERS = NIDM["NIDM_0000141"] +NIDM_CLUSTER_CENTER_OF_GRAVITY = NIDM["NIDM_0000140"] +NIDM_COORDINATE_VECTOR_IN_VOXELS = NIDM["NIDM_0000139"] +NIDM_HAS_MAXIMUM_INTENSITY_PROJECTION = NIDM["NIDM_0000138"] +NIDM_SEARCH_VOLUME_IN_VERTICES = NIDM["NIDM_0000137"] +NIDM_SEARCH_VOLUME_IN_UNITS = NIDM["NIDM_0000136"] +NIDM_CONTRAST_VARIANCE_MAP = NIDM["NIDM_0000135"] +NIDM_WITH_ESTIMATION_METHOD = NIDM["NIDM_0000134"] +NIDM_VOXEL_UNITS = NIDM["NIDM_0000133"] +NIDM_VOXEL_TO_WORLD_MAPPING = NIDM["NIDM_0000132"] +NIDM_VOXEL_SIZE = NIDM["NIDM_0000131"] +NIDM_VOXEL6CONNECTED = NIDM["NIDM_0000130"] +NIDM_VOXEL26CONNECTED = NIDM["NIDM_0000129"] +NIDM_VOXEL18CONNECTED = NIDM["NIDM_0000128"] +NIDM_VERSION = NIDM["NIDM_0000127"] +NIDM_VARIANCE_SPATIAL_MODEL = NIDM["NIDM_0000126"] +NIDM_USER_SPECIFIED_THRESHOLD_TYPE = NIDM["NIDM_0000125"] +NIDM_TARGET_INTENSITY = NIDM["NIDM_0000124"] +NIDM_STATISTIC_TYPE = NIDM["NIDM_0000123"] +NIDM_SOFTWARE_VERSION = NIDM["NIDM_0000122"] +NIDM_SEARCH_VOLUME_IN_VOXELS = NIDM["NIDM_0000121"] +NIDM_RANDOM_FIELD_STATIONARITY = NIDM["NIDM_0000120"] +NIDM_Q_VALUE_FDR = NIDM["NIDM_0000119"] +NIDM_PIXEL8CONNECTED = NIDM["NIDM_0000118"] +NIDM_PIXEL4CONNECTED = NIDM["NIDM_0000117"] +NIDM_P_VALUE_UNCORRECTED = NIDM["NIDM_0000116"] +NIDM_P_VALUE_FWER = NIDM["NIDM_0000115"] +NIDM_P_VALUE = NIDM["NIDM_0000114"] +NIDM_OBJECT_MODEL = NIDM["NIDM_0000113"] +NIDM_NUMBER_OF_DIMENSIONS = NIDM["NIDM_0000112"] +NIDM_NUMBER_OF_CLUSTERS = NIDM["NIDM_0000111"] +NIDM_GAUSSIAN_HRF = NIDM["NIDM_0000110"] +NIDM_MIN_DISTANCE_BETWEEN_PEAKS = NIDM["NIDM_0000109"] +NIDM_MAX_NUMBER_OF_PEAKS_PER_CLUSTER = NIDM["NIDM_0000108"] +NIDM_MASKED_MEDIAN = NIDM["NIDM_0000107"] +NIDM_IS_USER_DEFINED = NIDM["NIDM_0000106"] +NIDM_IN_WORLD_COORDINATE_SYSTEM = NIDM["NIDM_0000105"] +NIDM_IN_COORDINATE_SPACE = NIDM["NIDM_0000104"] +NIDM_HAS_MAP_HEADER = NIDM["NIDM_0000103"] +NIDM_HAS_HRF_BASIS = NIDM["NIDM_0000102"] +NIDM_HAS_ERROR_DISTRIBUTION = NIDM["NIDM_0000101"] +NIDM_HAS_ERROR_DEPENDENCE = NIDM["NIDM_0000100"] +NIDM_HAS_CONNECTIVITY_CRITERION = NIDM["NIDM_0000099"] +NIDM_HAS_CLUSTER_LABELS_MAP = NIDM["NIDM_0000098"] +NIDM_HAS_ALTERNATIVE_HYPOTHESIS = NIDM["NIDM_0000097"] +NIDM_GRAND_MEAN_SCALING = NIDM["NIDM_0000096"] +NIDM_ERROR_VARIANCE_HOMOGENEOUS = NIDM["NIDM_0000094"] +NIDM_ERROR_DEGREES_OF_FREEDOM = NIDM["NIDM_0000093"] +NIDM_EQUIVALENT_ZSTATISTIC = NIDM["NIDM_0000092"] +NIDM_EFFECT_DEGREES_OF_FREEDOM = NIDM["NIDM_0000091"] +NIDM_DIMENSIONS_IN_VOXELS = NIDM["NIDM_0000090"] +NIDM_DEPENDENCE_SPATIAL_MODEL = NIDM["NIDM_0000089"] +NIDM_HAS_DRIFT_MODEL = NIDM["NIDM_0000088"] +NIDM_DRIFT_MODEL = NIDM["NIDM_0000087"] +NIDM_COORDINATE_VECTOR = NIDM["NIDM_0000086"] +NIDM_CONTRAST_NAME = NIDM["NIDM_0000085"] +NIDM_CLUSTER_SIZE_IN_VOXELS = NIDM["NIDM_0000084"] +NIDM_CLUSTER_SIZE_IN_VERTICES = NIDM["NIDM_0000083"] +NIDM_CLUSTER_LABEL_ID = NIDM["NIDM_0000082"] +NIDM_WORLD_COORDINATE_SYSTEM = NIDM["NIDM_0000081"] +NIDM_VOXEL_CONNECTIVITY_CRITERION = NIDM["NIDM_0000080"] +NIDM_TWO_TAILED_TEST = NIDM["NIDM_0000079"] +NIDM_TALAIRACH_COORDINATE_SYSTEM = NIDM["NIDM_0000078"] +NIDM_SUBJECT_COORDINATE_SYSTEM = NIDM["NIDM_0000077"] +NIDM_STATISTIC_MAP = NIDM["NIDM_0000076"] +NIDM_STANDARDIZED_COORDINATE_SYSTEM = NIDM["NIDM_0000075"] +NIDM_SPATIALLY_REGULARIZED_MODEL = NIDM["NIDM_0000074"] +NIDM_SPATIALLY_LOCAL_MODEL = NIDM["NIDM_0000073"] +NIDM_SPATIALLY_GLOBAL_MODEL = NIDM["NIDM_0000072"] +NIDM_SPATIAL_MODEL = NIDM["NIDM_0000071"] +NIDM_SUPRA_THRESHOLD_CLUSTER = NIDM["NIDM_0000070"] +NIDM_FOURIER_BASIS_SET = NIDM["NIDM_0000069"] +NIDM_SEARCH_SPACE_MASK_MAP = NIDM["NIDM_0000068"] +NIDM_CUSTOM_BASIS_SET = NIDM["NIDM_0000067"] +NIDM_RESIDUAL_MEAN_SQUARES_MAP = NIDM["NIDM_0000066"] +NIDM_POISSON_DISTRIBUTION = NIDM["NIDM_0000065"] +NIDM_PIXEL_CONNECTIVITY_CRITERION = NIDM["NIDM_0000064"] +NIDM_PEAK_DEFINITION_CRITERIA = NIDM["NIDM_0000063"] +NIDM_PEAK = NIDM["NIDM_0000062"] +NIDM_PARAMETER_ESTIMATE_MAP = NIDM["NIDM_0000061"] +NIDM_ONE_TAILED_TEST = NIDM["NIDM_0000060"] +NIDM_NON_PARAMETRIC_SYMMETRIC_DISTRIBUTION = NIDM["NIDM_0000059"] +NIDM_NON_PARAMETRIC_DISTRIBUTION = NIDM["NIDM_0000058"] +NIDM_NIDM_OBJECT_MODEL = NIDM["NIDM_0000057"] +NIDM_MODEL_PARAMETERS_ESTIMATION = NIDM["NIDM_0000056"] +NIDM_MNI305_COORDINATE_SYSTEM = NIDM["NIDM_0000055"] +NIDM_MASK_MAP = NIDM["NIDM_0000054"] +NIDM_MAP_HEADER = NIDM["NIDM_0000053"] +NIDM_MAP = NIDM["NIDM_0000052"] +NIDM_MNI_COORDINATE_SYSTEM = NIDM["NIDM_0000051"] +NIDM_IXI549_COORDINATE_SYSTEM = NIDM["NIDM_0000050"] +NIDM_INFERENCE = NIDM["NIDM_0000049"] +NIDM_INDEPENDENT_ERROR = NIDM["NIDM_0000048"] +NIDM_ICBM_MNI152_NON_LINEAR6TH_GENERATION_COORDINATE_SYSTEM = NIDM["NIDM_0000047"] +NIDM_ICBM_MNI152_NON_LINEAR2009C_SYMMETRIC_COORDINATE_SYSTEM = NIDM["NIDM_0000046"] +NIDM_ICBM_MNI152_NON_LINEAR2009C_ASYMMETRIC_COORDINATE_SYSTEM = NIDM["NIDM_0000045"] +NIDM_ICBM_MNI152_NON_LINEAR2009B_SYMMETRIC_COORDINATE_SYSTEM = NIDM["NIDM_0000044"] +NIDM_ICBM_MNI152_NON_LINEAR2009B_ASYMMETRIC_COORDINATE_SYSTEM = NIDM["NIDM_0000043"] +NIDM_ICBM_MNI152_NON_LINEAR2009A_SYMMETRIC_COORDINATE_SYSTEM = NIDM["NIDM_0000042"] +NIDM_ICBM_MNI152_NON_LINEAR2009A_ASYMMETRIC_COORDINATE_SYSTEM = NIDM["NIDM_0000041"] +NIDM_ICBM_MNI152_LINEAR_COORDINATE_SYSTEM = NIDM["NIDM_0000040"] +NIDM_ICBM452_WARP5_COORDINATE_SYSTEM = NIDM["NIDM_0000039"] +NIDM_ICBM452_AIR_COORDINATE_SYSTEM = NIDM["NIDM_0000038"] +NIDM_HEMODYNAMIC_RESPONSE_FUNCTION_DERIVATIVE = NIDM["NIDM_0000037"] +NIDM_HEMODYNAMIC_RESPONSE_FUNCTION_BASIS = NIDM["NIDM_0000036"] +NIDM_HEMODYNAMIC_RESPONSE_FUNCTION = NIDM["NIDM_0000035"] +NIDM_HEIGHT_THRESHOLD = NIDM["NIDM_0000034"] +NIDM_GRAND_MEAN_MAP = NIDM["NIDM_0000033"] +NIDM_GAMMA_HRF = NIDM["NIDM_0000031"] +NIDM_GAMMA_HRB = NIDM["NIDM_0000030"] +NIDM_GAMMA_DIFFERENCE_HRF = NIDM["NIDM_0000029"] +NIDM_FINITE_IMPULSE_RESPONSE_HRB = NIDM["NIDM_0000028"] +NIDM_RESULTS = NIDM["NIDM_0000027"] +NIDM_EXTENT_THRESHOLD = NIDM["NIDM_0000026"] +NIDM_EXCURSION_SET_MAP = NIDM["NIDM_0000025"] +NIDM_EXCHANGEABLE_ERROR = NIDM["NIDM_0000024"] +NIDM_ERROR_MODEL = NIDM["NIDM_0000023"] +NIDM_ERROR_DISTRIBUTION = NIDM["NIDM_0000022"] +NIDM_REGRESSOR_NAMES = NIDM["NIDM_0000021"] +NIDM_DISPLAY_MASK_MAP = NIDM["NIDM_0000020"] +NIDM_DESIGN_MATRIX = NIDM["NIDM_0000019"] +NIDM_CUSTOM_COORDINATE_SYSTEM = NIDM["NIDM_0000017"] +NIDM_COORDINATE_SPACE = NIDM["NIDM_0000016"] +NIDM_COORDINATE = NIDM["NIDM_0000015"] +NIDM_LEGENDRE_POLYNOMIAL_ORDER = NIDM["NIDM_0000014"] +NIDM_CONTRAST_STANDARD_ERROR_MAP = NIDM["NIDM_0000013"] +NIDM_CONNECTIVITY_CRITERION = NIDM["NIDM_0000012"] +NIDM_CONJUNCTION_INFERENCE = NIDM["NIDM_0000011"] +NIDM_HAS_FMRI_DESIGN = NIDM["NIDM_0000010"] +NIDM_COLIN27_COORDINATE_SYSTEM = NIDM["NIDM_0000009"] +NIDM_CLUSTER_LABELS_MAP = NIDM["NIDM_0000008"] +NIDM_CLUSTER_DEFINITION_CRITERIA = NIDM["NIDM_0000007"] +NIDM_CLUSTER = NIDM["NIDM_0000006"] +NIDM_BINOMIAL_DISTRIBUTION = NIDM["NIDM_0000005"] +NIDM_BINARY_MAP = NIDM["NIDM_0000004"] +NIDM_CONTRAST_ESTIMATION = NIDM["NIDM_0000001"] +NIDM_CONTRAST_MAP = NIDM["NIDM_0000002"] # NIDM-Experiment############################################################## -NIDM_DATAELEMENT = QualifiedName(provNamespace("nidm", NIDM), 'DataElement') -NIDM_PROJECT = QualifiedName(provNamespace("nidm", NIDM), 'Project') -#NIDM_PROJECT_TYPE = QualifiedName(provNamespace("dctypes", DCTYPES),"Dataset") -NIDM_PROJECT_IDENTIFIER = QualifiedName(provNamespace("sio", SIO),"Identifier") -NIDM_PROJECT_NAME = QualifiedName(provNamespace("dctypes", DCTYPES),"title") -NIDM_PROJECT_DESCRIPTION = QualifiedName(provNamespace("dct", DCT),"description") -NIDM_DESCRIPTION = QualifiedName(provNamespace("dct", DCT),"description") -NIDM_DEFINITION = QualifiedName(provNamespace("dct", DCT),"description") -NIDM_PROJECT_LICENSE = QualifiedName(provNamespace("dct", DCT),"license") -NIDM_PROJECT_URL = QualifiedName(provNamespace("sio", SIO),"URL") -NIDM_PROJECT_REFERENCES = QualifiedName(provNamespace("dcat", DCAT),"creator") -NIDM_AUTHOR = QualifiedName(provNamespace("ncit", DCAT),"author") -NIDM_SESSION = QualifiedName(provNamespace("nidm", NIDM), 'Session') +NIDM_DATAELEMENT = QualifiedName(provNamespace("nidm", NIDM), "DataElement") +NIDM_PROJECT = QualifiedName(provNamespace("nidm", NIDM), "Project") +# NIDM_PROJECT_TYPE = QualifiedName(provNamespace("dctypes", DCTYPES),"Dataset") +NIDM_PROJECT_IDENTIFIER = QualifiedName(provNamespace("sio", SIO), "Identifier") +NIDM_PROJECT_NAME = QualifiedName(provNamespace("dctypes", DCTYPES), "title") +NIDM_PROJECT_DESCRIPTION = QualifiedName(provNamespace("dct", DCT), "description") +NIDM_DESCRIPTION = QualifiedName(provNamespace("dct", DCT), "description") +NIDM_DEFINITION = QualifiedName(provNamespace("dct", DCT), "description") +NIDM_PROJECT_LICENSE = QualifiedName(provNamespace("dct", DCT), "license") +NIDM_PROJECT_URL = QualifiedName(provNamespace("sio", SIO), "URL") +NIDM_PROJECT_REFERENCES = QualifiedName(provNamespace("dcat", DCAT), "creator") +NIDM_AUTHOR = QualifiedName(provNamespace("ncit", DCAT), "author") +NIDM_SESSION = QualifiedName(provNamespace("nidm", NIDM), "Session") NIDM_ACQUISITION_ACTIVITY = QualifiedName(provNamespace("nidm", NIDM), "Acquisition") -NIDM_ACQUISITION_MODALITY = QualifiedName(provNamespace("nidm",NIDM),"hadAcquisitionModality") -NIDM_ASSESSMENT_ACQUISITION = QualifiedName(provNamespace("onli", ONLI), "instrument-based-assessment") -NIDM_ACQUISITION_ENTITY = QualifiedName(provNamespace("nidm", NIDM), "AcquisitionObject") +NIDM_ACQUISITION_MODALITY = QualifiedName( + provNamespace("nidm", NIDM), "hadAcquisitionModality" +) +NIDM_ASSESSMENT_ACQUISITION = QualifiedName( + provNamespace("onli", ONLI), "instrument-based-assessment" +) +NIDM_ACQUISITION_ENTITY = QualifiedName( + provNamespace("nidm", NIDM), "AcquisitionObject" +) NIDM_PROJECT_SOURCE = QualifiedName(provNamespace("dctypes", DCTYPES), "source") -NIDM_HAD_NUMERICAL_VALUE = QualifiedName(provNamespace("nidm", NIDM), "hadNumericalValue") +NIDM_HAD_NUMERICAL_VALUE = QualifiedName( + provNamespace("nidm", NIDM), "hadNumericalValue" +) NIDM_BATH_SOLUTION = QualifiedName(provNamespace("nidm", NIDM), "BathSolution") NIDM_CELL_TYPE = QualifiedName(provNamespace("nidm", NIDM), "CellType") NIDM_CHANNEL_NUMBER = QualifiedName(provNamespace("nidm", NIDM), "ChannelNumber") -NIDM_ELECTRODE_IMPEDANCE = QualifiedName(provNamespace("nidm", NIDM), "ElectrodeImpedance") +NIDM_ELECTRODE_IMPEDANCE = QualifiedName( + provNamespace("nidm", NIDM), "ElectrodeImpedance" +) NIDM_GROUP_LABEL = QualifiedName(provNamespace("nidm", NIDM), "GroupLabel") -NIDM_HOLLOW_ELECTRODE_SOLUTION = QualifiedName(provNamespace("nidm", NIDM), "HollowElectrodeSolution") -NIDM_HAD_IMAGE_CONTRACT_TYPE = QualifiedName(provNamespace("nidm", NIDM), "hadImageContractType") -NIDM_HAD_IMAGE_USAGE_TYPE = QualifiedName(provNamespace("nidm", NIDM), "hadImageUsageType") +NIDM_HOLLOW_ELECTRODE_SOLUTION = QualifiedName( + provNamespace("nidm", NIDM), "HollowElectrodeSolution" +) +NIDM_HAD_IMAGE_CONTRACT_TYPE = QualifiedName( + provNamespace("nidm", NIDM), "hadImageContractType" +) +NIDM_HAD_IMAGE_USAGE_TYPE = QualifiedName( + provNamespace("nidm", NIDM), "hadImageUsageType" +) NIDM_NUBMER_OF_CHANNELS = QualifiedName(provNamespace("nidm", NIDM), "NubmerOfChannels") NIDM_APPLIED_FILTER = QualifiedName(provNamespace("nidm", NIDM), "AppliedFilter") -NIDM_SOLUTION_FLOW_SPEED = QualifiedName(provNamespace("nidm", NIDM), "SolutionFlowSpeed") -NIDM_RECORDING_LOCATION = QualifiedName(provNamespace("nidm", NIDM), "RecordingLocation") +NIDM_SOLUTION_FLOW_SPEED = QualifiedName( + provNamespace("nidm", NIDM), "SolutionFlowSpeed" +) +NIDM_RECORDING_LOCATION = QualifiedName( + provNamespace("nidm", NIDM), "RecordingLocation" +) -NIDM_DEMOGRAPHICS_ENTITY = QualifiedName(provNamespace("nidm", NIDM), "DemographicsInstrument") -NIDM_ASSESSMENT_USAGE_TYPE = QualifiedName(provNamespace("nidm", NIDM),"AssessmentUsageType") +NIDM_DEMOGRAPHICS_ENTITY = QualifiedName( + provNamespace("nidm", NIDM), "DemographicsInstrument" +) +NIDM_ASSESSMENT_USAGE_TYPE = QualifiedName( + provNamespace("nidm", NIDM), "AssessmentUsageType" +) -NIDM_ASSESSMENT_ENTITY = QualifiedName(provNamespace("onli", ONLI), "assessment-instrument") -#files +NIDM_ASSESSMENT_ENTITY = QualifiedName( + provNamespace("onli", ONLI), "assessment-instrument" +) +# files NIDM_FILENAME = QualifiedName(provNamespace("nfo", NFO), "filename") NIDM_FILE = QualifiedName(provNamespace("sio", SIO), "file") -#Roles +# Roles NIDM_PI = QualifiedName(provNamespace("birnlex", BIRNLEX), "birnlex_2152") -NIDM_COI = QualifiedName(provNamespace("birnlex", BIRNLEX),"birnlex_2199") -NIDM_PARTICIPANT = QualifiedName(provNamespace("sio", SIO),"Subject") -#Demographics -NIDM_AGE = QualifiedName(provNamespace("ncidb",NCICB),"Age") -NIDM_GENDER = QualifiedName(provNamespace("ndar",NDAR),"gender") -NIDM_SEX = QualifiedName(provNamespace("pato",PATO),"PhenotypicSex") -NIDM_HANDEDNESS = QualifiedName(provNamespace("obo",OBO),"handedness") -#NIDM_HANDEDNESS = OBO["PATO_0002201"] is correct term ID for handedness above -NIDM_ETHNICITY = QualifiedName(provNamespace("sio",SIO),"ethnicity") -NIDM_RACE = QualifiedName(provNamespace("sio",SIO),"race") - -#NCICB_ETHNICITY = NCICB["C16564"] is correct term ID for ethnic group -NIDM_DIAGNOSIS = QualifiedName(provNamespace("ncit",NCIT),"Diagnosis") -NIDM_FAMILY_NAME = QualifiedName(provNamespace("foaf",FOAF),"familyName") -NIDM_GIVEN_NAME = QualifiedName(provNamespace("foaf",FOAF),"givenName") -NIDM_SUBJECTID = QualifiedName(provNamespace("ndar",NDAR),"src_subject_id") -#MRI scan types -NIDM_IMAGE_CONTRAST_TYPE = QualifiedName(provNamespace("nidm", NIDM),"hadImageContrastType") -NIDM_IMAGE_USAGE_TYPE = QualifiedName(provNamespace("nidm", NIDM),"hadImageUsageType") -NIDM_PET = QualifiedName(provNamespace("nidm", NIDM),"PositronEmissionTomography") -NIDM_MRI = QualifiedName(provNamespace("nidm", NIDM),"MagneticResonanceImaging") -NIDM_MRI_ANATOMIC_SCAN = QualifiedName(provNamespace("nidm", NIDM),"Anatomical") -NIDM_MRI_STRUCTURE_SCAN = QualifiedName(provNamespace("nidm", NIDM),"Structural") -NIDM_MRI_FUNCTION_SCAN = QualifiedName(provNamespace("nidm", NIDM),"Functional") -NIDM_MRI_DWI_SCAN = QualifiedName(provNamespace("nidm", NIDM),"DiffusionWeighted") -NIDM_MRI_DWI_BVAL = QualifiedName(provNamespace("nidm", NIDM),"b-value") -NIDM_MRI_DWI_BVEC = QualifiedName(provNamespace("nidm", NIDM),"b-vector") -NIDM_MRI_FUNCTION_TASK = QualifiedName(provNamespace("nidm", NIDM),"Task") -NIDM_MRI_T1 = QualifiedName(provNamespace("nidm", NIDM),"T1Weighted") -NIDM_MRI_T2 = QualifiedName(provNamespace("nidm", NIDM),"T2Weighted") -NIDM_MRI_T2_STAR = QualifiedName(provNamespace("nidm", NIDM),"T2StarWeighted") -NIDM_MRI_DIFFUSION_TENSOR = QualifiedName(provNamespace("nidm", NIDM),"DiffusionTensor") -NIDM_MRI_FLOW = QualifiedName(provNamespace("nidm", NIDM),"FlowWeighted") -NIDM_MRI_BOLD_EVENTS = QualifiedName(provNamespace("nidm", NIDM),"StimulusResponseFile") -NIDM_MRI_ASL = QualifiedName(provNamespace("nidm",NIDM),"ArterialSpinLabeling") -CRYPTO_SHA512 =QualifiedName(provNamespace("crypto", CRYPTO),"sha512") -DATALAD_LOCATION = QualifiedName(provNamespace("datalad", DATALAD),"Location") -NIDM_DOI = QualifiedName(provNamespace("edam",EDAM),"data_1188") -NIDM_FUNDING = QualifiedName(provNamespace("obo",OBO),"IAO_0000623") -NIDM_ACKNOWLEDGEMENTS = QualifiedName(provNamespace("obo",OBO),"IAO_0000324") +NIDM_COI = QualifiedName(provNamespace("birnlex", BIRNLEX), "birnlex_2199") +NIDM_PARTICIPANT = QualifiedName(provNamespace("sio", SIO), "Subject") +# Demographics +NIDM_AGE = QualifiedName(provNamespace("ncidb", NCICB), "Age") +NIDM_GENDER = QualifiedName(provNamespace("ndar", NDAR), "gender") +NIDM_SEX = QualifiedName(provNamespace("pato", PATO), "PhenotypicSex") +NIDM_HANDEDNESS = QualifiedName(provNamespace("obo", OBO), "handedness") +# NIDM_HANDEDNESS = OBO["PATO_0002201"] is correct term ID for handedness above +NIDM_ETHNICITY = QualifiedName(provNamespace("sio", SIO), "ethnicity") +NIDM_RACE = QualifiedName(provNamespace("sio", SIO), "race") + +# NCICB_ETHNICITY = NCICB["C16564"] is correct term ID for ethnic group +NIDM_DIAGNOSIS = QualifiedName(provNamespace("ncit", NCIT), "Diagnosis") +NIDM_FAMILY_NAME = QualifiedName(provNamespace("foaf", FOAF), "familyName") +NIDM_GIVEN_NAME = QualifiedName(provNamespace("foaf", FOAF), "givenName") +NIDM_SUBJECTID = QualifiedName(provNamespace("ndar", NDAR), "src_subject_id") +# MRI scan types +NIDM_IMAGE_CONTRAST_TYPE = QualifiedName( + provNamespace("nidm", NIDM), "hadImageContrastType" +) +NIDM_IMAGE_USAGE_TYPE = QualifiedName(provNamespace("nidm", NIDM), "hadImageUsageType") +NIDM_PET = QualifiedName(provNamespace("nidm", NIDM), "PositronEmissionTomography") +NIDM_MRI = QualifiedName(provNamespace("nidm", NIDM), "MagneticResonanceImaging") +NIDM_MRI_ANATOMIC_SCAN = QualifiedName(provNamespace("nidm", NIDM), "Anatomical") +NIDM_MRI_STRUCTURE_SCAN = QualifiedName(provNamespace("nidm", NIDM), "Structural") +NIDM_MRI_FUNCTION_SCAN = QualifiedName(provNamespace("nidm", NIDM), "Functional") +NIDM_MRI_DWI_SCAN = QualifiedName(provNamespace("nidm", NIDM), "DiffusionWeighted") +NIDM_MRI_DWI_BVAL = QualifiedName(provNamespace("nidm", NIDM), "b-value") +NIDM_MRI_DWI_BVEC = QualifiedName(provNamespace("nidm", NIDM), "b-vector") +NIDM_MRI_FUNCTION_TASK = QualifiedName(provNamespace("nidm", NIDM), "Task") +NIDM_MRI_T1 = QualifiedName(provNamespace("nidm", NIDM), "T1Weighted") +NIDM_MRI_T2 = QualifiedName(provNamespace("nidm", NIDM), "T2Weighted") +NIDM_MRI_T2_STAR = QualifiedName(provNamespace("nidm", NIDM), "T2StarWeighted") +NIDM_MRI_DIFFUSION_TENSOR = QualifiedName( + provNamespace("nidm", NIDM), "DiffusionTensor" +) +NIDM_MRI_FLOW = QualifiedName(provNamespace("nidm", NIDM), "FlowWeighted") +NIDM_MRI_BOLD_EVENTS = QualifiedName( + provNamespace("nidm", NIDM), "StimulusResponseFile" +) +NIDM_MRI_ASL = QualifiedName(provNamespace("nidm", NIDM), "ArterialSpinLabeling") +CRYPTO_SHA512 = QualifiedName(provNamespace("crypto", CRYPTO), "sha512") +DATALAD_LOCATION = QualifiedName(provNamespace("datalad", DATALAD), "Location") +NIDM_DOI = QualifiedName(provNamespace("edam", EDAM), "data_1188") +NIDM_FUNDING = QualifiedName(provNamespace("obo", OBO), "IAO_0000623") +NIDM_ACKNOWLEDGEMENTS = QualifiedName(provNamespace("obo", OBO), "IAO_0000324") ############################################################################## # OBO constants -OBO_EXAMPLE = OBO['IAO_0000112'] -OBO_TERM_EDITOR = OBO['IAO_0000117'] -OBO_EDITOR_NOTE = OBO['IAO_0000116'] - -OBO_PENDING_FINAL = OBO['IAO_0000125'] -OBO_METADATA_COMPLETE = OBO['IAO_0000120'] -OBO_METADATA_INCOMPLETE = OBO['IAO_0000123'] -OBO_REQUIRES_DISCUSSION = OBO['IAO_0000428'] -OBO_UNCURATED = OBO['IAO_0000124'] -OBO_TO_BE_REPLACED = OBO['IAO_0000423'] -OBO_READY = OBO['IAO_0000122'] -OBO_DEFINITION = OBO['IAO_0000115'] - -OBO_STATISTIC = OBO['STATO_0000039'] +OBO_EXAMPLE = OBO["IAO_0000112"] +OBO_TERM_EDITOR = OBO["IAO_0000117"] +OBO_EDITOR_NOTE = OBO["IAO_0000116"] + +OBO_PENDING_FINAL = OBO["IAO_0000125"] +OBO_METADATA_COMPLETE = OBO["IAO_0000120"] +OBO_METADATA_INCOMPLETE = OBO["IAO_0000123"] +OBO_REQUIRES_DISCUSSION = OBO["IAO_0000428"] +OBO_UNCURATED = OBO["IAO_0000124"] +OBO_TO_BE_REPLACED = OBO["IAO_0000423"] +OBO_READY = OBO["IAO_0000122"] +OBO_DEFINITION = OBO["IAO_0000115"] + +OBO_STATISTIC = OBO["STATO_0000039"] OBO_STATISTIC_QNAME = q_graph.qname(OBO_STATISTIC) -OBO_P_VALUE_FWER = OBO['OBI_0001265'] +OBO_P_VALUE_FWER = OBO["OBI_0001265"] OBO_P_VALUE_FWER_QNAME = q_graph.qname(OBO_P_VALUE_FWER) -OBO_Q_VALUE_FDR = OBO['OBI_0001442'] +OBO_Q_VALUE_FDR = OBO["OBI_0001442"] OBO_Q_VALUE_FDR_QNAME = q_graph.qname(OBO_Q_VALUE_FDR) -HAS_CURATION_STATUS = OBO['IAO_0000114'] +HAS_CURATION_STATUS = OBO["IAO_0000114"] -STATO_OLS = OBO['STATO_0000370'] +STATO_OLS = OBO["STATO_0000370"] STATO_OLS_STR = q_graph.qname(STATO_OLS) # TODO: labels should be grabbed automatically from the corresponding owl file STATO_OLS_LABEL = "obo:'ordinary least squares estimation'" -STATO_GLS = OBO['STATO_0000372'] +STATO_GLS = OBO["STATO_0000372"] STATO_GLS_STR = q_graph.qname(STATO_GLS) STATO_GLS_LABEL = "obo:'generalized least squares estimation'" -STATO_TSTATISTIC = OBO['STATO_0000176'] +STATO_TSTATISTIC = OBO["STATO_0000176"] STATO_TSTATISTIC_STR = q_graph.qname(STATO_TSTATISTIC) STATO_TSTATISTIC_LABEL = "obo:'t-statistic'" -STATO_ZSTATISTIC = OBO['STATO_0000376'] +STATO_ZSTATISTIC = OBO["STATO_0000376"] STATO_ZSTATISTIC_STR = q_graph.qname(STATO_ZSTATISTIC) STATO_ZSTATISTIC_LABEL = "obo:'Z-statistic'" -STATO_CONTRAST_WEIGHT_MATRIX = OBO['STATO_0000323'] -STATO_GAUSSIAN_DISTRIBUTION = OBO['STATO_0000227'] -STATO_UNSTRUCTURED_COVARIANCE = OBO['STATO_0000405'] -STATO_GROUP = OBO['STATO_0000193'] +STATO_CONTRAST_WEIGHT_MATRIX = OBO["STATO_0000323"] +STATO_GAUSSIAN_DISTRIBUTION = OBO["STATO_0000227"] +STATO_UNSTRUCTURED_COVARIANCE = OBO["STATO_0000405"] +STATO_GROUP = OBO["STATO_0000193"] -SPM_SOFTWARE = SCR['SCR_007037'] -FSL_SOFTWARE = SCR['SCR_002823'] +SPM_SOFTWARE = SCR["SCR_007037"] +FSL_SOFTWARE = SCR["SCR_002823"] -NLX_MRI_SCANNER = NLX['birnlex_2100'] -NLX_FMRI_PROTOCOL = NLX['birnlex_2250'] -NLX_IMAGING_INSTRUMENT = NLX['birnlex_2094'] +NLX_MRI_SCANNER = NLX["birnlex_2100"] +NLX_FMRI_PROTOCOL = NLX["birnlex_2250"] +NLX_IMAGING_INSTRUMENT = NLX["birnlex_2094"] -SKOS_DEFINITION = SKOS['definition'] +SKOS_DEFINITION = SKOS["definition"] # ProvONE Constants for classes -PROVONE_PROCESS = PROVONE['Process'] -PROVONE_USER = PROVONE['User'] -PROVONE_PROCESSEXEC = PROVONE['ProcessExec'] -PROVONE_DATA = PROVONE['Data'] -PROVONE_INPUTPORT = PROVONE['InputPort'] -PROVONE_OUTPUTPORT = PROVONE['OutputPort'] -PROVONE_DATALINK = PROVONE['DataLink'] -PROVONE_SEQCTRLLINK = PROVONE['seqCtrlLink'] +PROVONE_PROCESS = PROVONE["Process"] +PROVONE_USER = PROVONE["User"] +PROVONE_PROCESSEXEC = PROVONE["ProcessExec"] +PROVONE_DATA = PROVONE["Data"] +PROVONE_INPUTPORT = PROVONE["InputPort"] +PROVONE_OUTPUTPORT = PROVONE["OutputPort"] +PROVONE_DATALINK = PROVONE["DataLink"] +PROVONE_SEQCTRLLINK = PROVONE["seqCtrlLink"] # ProvONE Constants for Associations -PROVONE_HASOUTPORT = PROVONE['hasOutPort'] -PROVONE_HASINPORT = PROVONE['hasInPort'] -PROVONE_HASSUBPROCESS = PROVONE['hasSubProcess'] -PROVONE_INPORTTODL = PROVONE['inPortToDL'] -PROVONE_DLTOINPORT = PROVONE['DLToInPort'] -PROVONE_OUTPORTTODL = PROVONE['outPortToDL'] -PROVONE_DLTOOUTPORT = PROVONE['DLToOutPort'] -PROVONE_CLTODESTP = PROVONE['CLtoDestP'] -PROVONE_SOURCEPTOCL = PROVONE['sourcePToCL'] -PROVONE_DATAONLINK = PROVONE['dataOnLink'] -PROVONE_HASDEFAULTPARAM = PROVONE['hasDefaultParameter'] -PROVONE_ISPARTOF = PROVONE['isPartOf'] -PROVONE_MEMBERSHIP = PROVONE['hadMember'] +PROVONE_HASOUTPORT = PROVONE["hasOutPort"] +PROVONE_HASINPORT = PROVONE["hasInPort"] +PROVONE_HASSUBPROCESS = PROVONE["hasSubProcess"] +PROVONE_INPORTTODL = PROVONE["inPortToDL"] +PROVONE_DLTOINPORT = PROVONE["DLToInPort"] +PROVONE_OUTPORTTODL = PROVONE["outPortToDL"] +PROVONE_DLTOOUTPORT = PROVONE["DLToOutPort"] +PROVONE_CLTODESTP = PROVONE["CLtoDestP"] +PROVONE_SOURCEPTOCL = PROVONE["sourcePToCL"] +PROVONE_DATAONLINK = PROVONE["dataOnLink"] +PROVONE_HASDEFAULTPARAM = PROVONE["hasDefaultParameter"] +PROVONE_ISPARTOF = PROVONE["isPartOf"] +PROVONE_MEMBERSHIP = PROVONE["hadMember"] # ProvONE notation mapping PROVONE_N_MAP = { - PROVONE_PROCESS: u'process', - PROVONE_PROCESSEXEC: u'processExec', - PROVONE_USER: u'user', - PROVONE_DATA: u'data', - PROVONE_HASINPORT: u'hasInPort', - PROVONE_INPUTPORT: u'inputPort', - PROVONE_OUTPUTPORT: u'outputPort', - PROVONE_HASOUTPORT: u'hasOutPort', - PROVONE_HASSUBPROCESS: u'hasSubProcess', - PROVONE_INPORTTODL: u'inPortToDL', - PROVONE_DATALINK: u'dataLink', - PROVONE_SEQCTRLLINK: u'seqCtrlLink', - PROVONE_CLTODESTP: u'CLtoDestP', - PROVONE_SOURCEPTOCL: u'sourcePtoCL', - PROVONE_OUTPORTTODL: u'outPortToDL', - PROVONE_DLTOOUTPORT: u'DLToOutPort', - PROVONE_DLTOINPORT: u'DLToInPort', - PROVONE_DATAONLINK: u'dataOnLink', - PROVONE_HASDEFAULTPARAM: u'hasDefaultParamter', - PROVONE_ISPARTOF: u'isPartOf', - PROVONE_MEMBERSHIP: u'hadMember', - + PROVONE_PROCESS: "process", + PROVONE_PROCESSEXEC: "processExec", + PROVONE_USER: "user", + PROVONE_DATA: "data", + PROVONE_HASINPORT: "hasInPort", + PROVONE_INPUTPORT: "inputPort", + PROVONE_OUTPUTPORT: "outputPort", + PROVONE_HASOUTPORT: "hasOutPort", + PROVONE_HASSUBPROCESS: "hasSubProcess", + PROVONE_INPORTTODL: "inPortToDL", + PROVONE_DATALINK: "dataLink", + PROVONE_SEQCTRLLINK: "seqCtrlLink", + PROVONE_CLTODESTP: "CLtoDestP", + PROVONE_SOURCEPTOCL: "sourcePtoCL", + PROVONE_OUTPORTTODL: "outPortToDL", + PROVONE_DLTOOUTPORT: "DLToOutPort", + PROVONE_DLTOINPORT: "DLToInPort", + PROVONE_DATAONLINK: "dataOnLink", + PROVONE_HASDEFAULTPARAM: "hasDefaultParamter", + PROVONE_ISPARTOF: "isPartOf", + PROVONE_MEMBERSHIP: "hadMember", } # Identifiers for PROVONE's attributes -PROVONE_ATTR_PROCESS = PROVONE['process'] -PROVONE_ATTR_USER = PROVONE['user'] -PROVONE_ATTR_PROCESSEXEC = PROVONE['processExec'] -PROVONE_ATTR_PLAN = PROVONE['plan'] -PROVONE_ATTR_GENERATED_DATA = PROVONE['generatedData'] -PROVONE_ATTR_USED_DATA = PROVONE['usedData'] -PROVONE_ATTR_GENERATION = PROVONE['generation'] -#PROVONE_ATTR_USAGE = PROVONE['usage'] -PROVONE_ATTR_DATA = PROVONE['data'] -PROVONE_ATTR_INFORMED = PROVONE['informed'] -PROVONE_ATTR_INFORMANT = PROVONE['informant'] -PROVONE_ATTR_HASINPORT = PROVONE['hasInPort'] -PROVONE_ATTR_HASOUTPORT = PROVONE['HasOutPort'] -PROVONE_ATTR_INPUTPORT = PROVONE['InputPort'] -PROVONE_ATTR_OUTPUTPORT = PROVONE['OutputPort'] -PROVONE_ATTR_GENERATED_PROCESS = PROVONE['generatedProcess'] -PROVONE_ATTR_USED_PROCESS = PROVONE['usedProcess'] -PROVONE_ATTR_HASSUBPROCESS = PROVONE['hasSubProcess'] -PROVONE_ATTR_DATALINK = PROVONE['dataLink'] -PROVONE_ATTR_SEQCTRLLINK = PROVONE['seqCtrlLink'] -PROVONE_ATTR_CLTODESTP = PROVONE['clToDestP'] -PROVONE_ATTR_SOURCEPTOCL = PROVONE['sourcePtoCL'] -PROVONE_ATTR_RELATED_PREXEC = PROVONE['relatedProcessExec'], -PROVONE_ATTR_USED_PREXEC = PROVONE['usedProcessExec'] -PROVONE_ATTR_CHILD_PREXEC = PROVONE['childProcessExec'] +PROVONE_ATTR_PROCESS = PROVONE["process"] +PROVONE_ATTR_USER = PROVONE["user"] +PROVONE_ATTR_PROCESSEXEC = PROVONE["processExec"] +PROVONE_ATTR_PLAN = PROVONE["plan"] +PROVONE_ATTR_GENERATED_DATA = PROVONE["generatedData"] +PROVONE_ATTR_USED_DATA = PROVONE["usedData"] +PROVONE_ATTR_GENERATION = PROVONE["generation"] +# PROVONE_ATTR_USAGE = PROVONE['usage'] +PROVONE_ATTR_DATA = PROVONE["data"] +PROVONE_ATTR_INFORMED = PROVONE["informed"] +PROVONE_ATTR_INFORMANT = PROVONE["informant"] +PROVONE_ATTR_HASINPORT = PROVONE["hasInPort"] +PROVONE_ATTR_HASOUTPORT = PROVONE["HasOutPort"] +PROVONE_ATTR_INPUTPORT = PROVONE["InputPort"] +PROVONE_ATTR_OUTPUTPORT = PROVONE["OutputPort"] +PROVONE_ATTR_GENERATED_PROCESS = PROVONE["generatedProcess"] +PROVONE_ATTR_USED_PROCESS = PROVONE["usedProcess"] +PROVONE_ATTR_HASSUBPROCESS = PROVONE["hasSubProcess"] +PROVONE_ATTR_DATALINK = PROVONE["dataLink"] +PROVONE_ATTR_SEQCTRLLINK = PROVONE["seqCtrlLink"] +PROVONE_ATTR_CLTODESTP = PROVONE["clToDestP"] +PROVONE_ATTR_SOURCEPTOCL = PROVONE["sourcePtoCL"] +PROVONE_ATTR_RELATED_PREXEC = (PROVONE["relatedProcessExec"],) +PROVONE_ATTR_USED_PREXEC = PROVONE["usedProcessExec"] +PROVONE_ATTR_CHILD_PREXEC = PROVONE["childProcessExec"] PROVONE_ATTRIBUTE_QNAMES = { - PROVONE_ATTR_PROCESS, - PROVONE_ATTR_USER, - PROVONE_ATTR_PROCESSEXEC, - PROVONE_ATTR_PLAN, - PROVONE_ATTR_GENERATED_DATA, - PROVONE_ATTR_USED_DATA, - PROVONE_ATTR_DATA, - PROVONE_ATTR_INFORMED, - PROVONE_ATTR_INFORMANT, - PROVONE_ATTR_HASINPORT, - PROVONE_ATTR_HASOUTPORT, - PROVONE_ATTR_INPUTPORT, - PROVONE_ATTR_OUTPUTPORT, - PROVONE_ATTR_GENERATED_PROCESS, - PROVONE_ATTR_USED_PROCESS, - PROVONE_ATTR_HASSUBPROCESS, - PROVONE_ATTR_DATALINK, - PROVONE_ATTR_SEQCTRLLINK, - PROVONE_ATTR_CLTODESTP, - PROVONE_ATTR_SOURCEPTOCL, - PROVONE_ATTR_RELATED_PREXEC, - PROVONE_ATTR_USED_PREXEC, - PROVONE_ATTR_CHILD_PREXEC, - #PROV_ATTR_COLLECTION + PROVONE_ATTR_PROCESS, + PROVONE_ATTR_USER, + PROVONE_ATTR_PROCESSEXEC, + PROVONE_ATTR_PLAN, + PROVONE_ATTR_GENERATED_DATA, + PROVONE_ATTR_USED_DATA, + PROVONE_ATTR_DATA, + PROVONE_ATTR_INFORMED, + PROVONE_ATTR_INFORMANT, + PROVONE_ATTR_HASINPORT, + PROVONE_ATTR_HASOUTPORT, + PROVONE_ATTR_INPUTPORT, + PROVONE_ATTR_OUTPUTPORT, + PROVONE_ATTR_GENERATED_PROCESS, + PROVONE_ATTR_USED_PROCESS, + PROVONE_ATTR_HASSUBPROCESS, + PROVONE_ATTR_DATALINK, + PROVONE_ATTR_SEQCTRLLINK, + PROVONE_ATTR_CLTODESTP, + PROVONE_ATTR_SOURCEPTOCL, + PROVONE_ATTR_RELATED_PREXEC, + PROVONE_ATTR_USED_PREXEC, + PROVONE_ATTR_CHILD_PREXEC, + # PROV_ATTR_COLLECTION } # Set of formal attributes of PROV records -PROVONE_ATTRIBUTES = PROVONE_ATTRIBUTE_QNAMES | PROV_ATTRIBUTE_QNAMES | \ - PROV_ATTRIBUTE_LITERALS -PROVONE_RECORD_ATTRIBUTES = list((attr, six.text_type(attr)) for attr in - PROVONE_ATTRIBUTES) +PROVONE_ATTRIBUTES = ( + PROVONE_ATTRIBUTE_QNAMES | PROV_ATTRIBUTE_QNAMES | PROV_ATTRIBUTE_LITERALS +) +PROVONE_RECORD_ATTRIBUTES = list( + (attr, six.text_type(attr)) for attr in PROVONE_ATTRIBUTES +) PROV_RECORD_IDS_MAP = dict( (PROV_N_MAP[rec_type_id], rec_type_id) for rec_type_id in PROV_N_MAP @@ -567,72 +604,73 @@ def __init__(self, namespaces=None): ) - ####ADDED BY DBK to make searching NIDM-Experiment Terms easier...temporary, should be done in the OWL file##### -nidm_experiment_terms = [NIDM_PROJECT, -NIDM_PROJECT_IDENTIFIER, -NIDM_PROJECT_NAME, -NIDM_PROJECT_DESCRIPTION, -NIDM_PROJECT_LICENSE, -NIDM_PROJECT_URL, -NIDM_PROJECT_REFERENCES, -NIDM_AUTHOR, -NIDM_SESSION, -NIDM_ACQUISITION_ACTIVITY, -NIDM_ACQUISITION_MODALITY, -NIDM_ASSESSMENT_ACQUISITION, -NIDM_ACQUISITION_ENTITY, -NIDM_DEMOGRAPHICS_ENTITY, -NIDM_ASSESSMENT_ENTITY, -NIDM_FILENAME, -NIDM_FILE, -NIDM_PI, -NIDM_COI, -NIDM_PARTICIPANT, -NIDM_AGE, -NIDM_GENDER, -NIDM_SEX, -NIDM_HANDEDNESS, -NIDM_RACE, -NIDM_ETHNICITY, -NIDM_DIAGNOSIS, -NIDM_FAMILY_NAME, -NIDM_GIVEN_NAME, -NIDM_SUBJECTID, -NIDM_IMAGE_CONTRAST_TYPE, -NIDM_IMAGE_USAGE_TYPE, -NIDM_MRI, -NIDM_MRI_ANATOMIC_SCAN, -NIDM_MRI_STRUCTURE_SCAN, -NIDM_MRI_FUNCTION_SCAN, -NIDM_MRI_DWI_SCAN, -NIDM_MRI_DWI_BVAL, -NIDM_MRI_DWI_BVEC, -NIDM_MRI_FUNCTION_TASK, -NIDM_MRI_T1, -NIDM_MRI_T2, -NIDM_MRI_T2_STAR, -NIDM_MRI_DIFFUSION_TENSOR, -NIDM_MRI_FLOW, -NIDM_MRI_BOLD_EVENTS, -NIDM_DOI] +nidm_experiment_terms = [ + NIDM_PROJECT, + NIDM_PROJECT_IDENTIFIER, + NIDM_PROJECT_NAME, + NIDM_PROJECT_DESCRIPTION, + NIDM_PROJECT_LICENSE, + NIDM_PROJECT_URL, + NIDM_PROJECT_REFERENCES, + NIDM_AUTHOR, + NIDM_SESSION, + NIDM_ACQUISITION_ACTIVITY, + NIDM_ACQUISITION_MODALITY, + NIDM_ASSESSMENT_ACQUISITION, + NIDM_ACQUISITION_ENTITY, + NIDM_DEMOGRAPHICS_ENTITY, + NIDM_ASSESSMENT_ENTITY, + NIDM_FILENAME, + NIDM_FILE, + NIDM_PI, + NIDM_COI, + NIDM_PARTICIPANT, + NIDM_AGE, + NIDM_GENDER, + NIDM_SEX, + NIDM_HANDEDNESS, + NIDM_RACE, + NIDM_ETHNICITY, + NIDM_DIAGNOSIS, + NIDM_FAMILY_NAME, + NIDM_GIVEN_NAME, + NIDM_SUBJECTID, + NIDM_IMAGE_CONTRAST_TYPE, + NIDM_IMAGE_USAGE_TYPE, + NIDM_MRI, + NIDM_MRI_ANATOMIC_SCAN, + NIDM_MRI_STRUCTURE_SCAN, + NIDM_MRI_FUNCTION_SCAN, + NIDM_MRI_DWI_SCAN, + NIDM_MRI_DWI_BVAL, + NIDM_MRI_DWI_BVEC, + NIDM_MRI_FUNCTION_TASK, + NIDM_MRI_T1, + NIDM_MRI_T2, + NIDM_MRI_T2_STAR, + NIDM_MRI_DIFFUSION_TENSOR, + NIDM_MRI_FLOW, + NIDM_MRI_BOLD_EVENTS, + NIDM_DOI, +] # Common isAbout URIs -NIDM_IS_ABOUT_AGE = str(INTERLEX['ilx_0100400']) -NIDM_IS_ABOUT_HANDEDNESS = str(OBO['PATO_0002201']) -NIDM_IS_ABOUT_GENDER = str(INTERLEX['ilx_0101292']) +NIDM_IS_ABOUT_AGE = str(INTERLEX["ilx_0100400"]) +NIDM_IS_ABOUT_HANDEDNESS = str(OBO["PATO_0002201"]) +NIDM_IS_ABOUT_GENDER = str(INTERLEX["ilx_0101292"]) # REST API constants -NIDM_REST_NUM_SUBJECTS = 'number_of_subjects' -NIDM_REST_MAX_AGE = 'age_max' -NIDM_REST_MIN_AGE = 'age_min' -NIDM_REST_GENDER = 'gender' -NIDM_REST_AGE = 'age' +NIDM_REST_NUM_SUBJECTS = "number_of_subjects" +NIDM_REST_MAX_AGE = "age_max" +NIDM_REST_MIN_AGE = "age_min" +NIDM_REST_GENDER = "gender" +NIDM_REST_AGE = "age" # canonical CDE file locations CDE_FILE_LOCATIONS = [ - "https://raw.githubusercontent.com/ReproNim/fsl_seg_to_nidm/master/fsl_seg_to_nidm/mapping_data/fsl_cde.ttl", - "https://raw.githubusercontent.com/ReproNim/ants_seg_to_nidm/master/ants_seg_to_nidm/mapping_data/ants_cde.ttl", - "https://raw.githubusercontent.com/ReproNim/segstats_jsonld/master/segstats_jsonld/mapping_data/fs_cde.ttl" -] \ No newline at end of file + "https://raw.githubusercontent.com/ReproNim/fsl_seg_to_nidm/master/fsl_seg_to_nidm/mapping_data/fsl_cde.ttl", + "https://raw.githubusercontent.com/ReproNim/ants_seg_to_nidm/master/ants_seg_to_nidm/mapping_data/ants_cde.ttl", + "https://raw.githubusercontent.com/ReproNim/segstats_jsonld/master/segstats_jsonld/mapping_data/fs_cde.ttl", +] diff --git a/nidm/core/cde_dir/ants_cde.ttl b/nidm/core/cde_dir/ants_cde.ttl index 82c7891e..60b5d15e 100644 --- a/nidm/core/cde_dir/ants_cde.ttl +++ b/nidm/core/cde_dir/ants_cde.ttl @@ -3342,4 +3342,3 @@ ants:ants_000299 a ants:DataElement ; ants:unit "mm^2" . ants:DataElement rdfs:subClassOf nidm:DataElement . - diff --git a/nidm/core/cde_dir/fs_cde.ttl b/nidm/core/cde_dir/fs_cde.ttl index 6c28f8b6..667a84c5 100644 --- a/nidm/core/cde_dir/fs_cde.ttl +++ b/nidm/core/cde_dir/fs_cde.ttl @@ -47703,4 +47703,3 @@ fs:fs_003579 a fs:DataElement ; fs:unit "MR" . fs:DataElement rdfs:subClassOf nidm:DataElement . - diff --git a/nidm/core/cde_dir/fsl_cde.ttl b/nidm/core/cde_dir/fsl_cde.ttl index 470cc1ce..0e4d954b 100644 --- a/nidm/core/cde_dir/fsl_cde.ttl +++ b/nidm/core/cde_dir/fsl_cde.ttl @@ -392,4 +392,3 @@ fsl:fsl_000036 a fsl:DataElement ; nidm:measureOf . fsl:DataElement rdfs:subClassOf nidm:DataElement . - diff --git a/nidm/core/dot.py b/nidm/core/dot.py index 2d63227d..b8fd1ebc 100644 --- a/nidm/core/dot.py +++ b/nidm/core/dot.py @@ -11,180 +11,168 @@ .. moduleauthor:: Sanu Ann Abraham """ -from __future__ import (absolute_import, division, print_function, - unicode_literals) +from __future__ import absolute_import, division, print_function, unicode_literals try: from html import escape except ImportError: from cgi import escape + from datetime import datetime +from prov.dot import DOT_PROV_STYLE +from prov.model import ( + PROV_ACTIVITY, + PROV_AGENT, + PROV_ALTERNATE, + PROV_ASSOCIATION, + PROV_ATTRIBUTE_QNAMES, + PROV_ATTRIBUTION, + PROV_BUNDLE, + PROV_COMMUNICATION, + PROV_DELEGATION, + PROV_DERIVATION, + PROV_END, + PROV_ENTITY, + PROV_GENERATION, + PROV_INFLUENCE, + PROV_INVALIDATION, + PROV_MEMBERSHIP, + PROV_MENTION, + PROV_SPECIALIZATION, + PROV_START, + PROV_USAGE, + Identifier, + ProvException, + sorted_attributes, +) import pydot import six - from .Constants import * -from prov.model import ( - PROV_ACTIVITY, PROV_AGENT, PROV_ALTERNATE, PROV_ASSOCIATION, - PROV_ATTRIBUTION, PROV_BUNDLE, PROV_COMMUNICATION, PROV_DERIVATION, - PROV_DELEGATION, PROV_ENTITY, PROV_GENERATION, PROV_INFLUENCE, - PROV_INVALIDATION, PROV_END, PROV_MEMBERSHIP, PROV_MENTION, - PROV_SPECIALIZATION, PROV_START, PROV_USAGE, Identifier, - PROV_ATTRIBUTE_QNAMES, sorted_attributes, ProvException -) - -from prov.dot import DOT_PROV_STYLE - -__author__ = 'Sanu Ann Abraham' -__email__ = 'sanuann@mit.edu' +__author__ = "Sanu Ann Abraham" +__email__ = "sanuann@mit.edu" # Visual styles for various elements (nodes) and relations (edges) # see http://graphviz.org/content/attrs -#DOT_PROVONE_STYLE = DOT_PROV_STYLE +# DOT_PROVONE_STYLE = DOT_PROV_STYLE DOT_PROVONE_STYLE = { PROVONE_PROCESS: { - 'shape': 'oval', 'style': 'filled', - 'fillcolor': '#FFFC87', 'color': '#808080' - }, - PROVONE_PROCESSEXEC: { - 'shape': 'box', 'style': 'filled', - 'fillcolor': '#9FB1FC', 'color': '#0000FF' + "shape": "oval", + "style": "filled", + "fillcolor": "#FFFC87", + "color": "#808080", }, - PROVONE_INPUTPORT: { - 'shape': 'oval', 'style': 'filled', - 'fillcolor': '#FFFC87', 'color': '#808080' + PROVONE_PROCESSEXEC: { + "shape": "box", + "style": "filled", + "fillcolor": "#9FB1FC", + "color": "#0000FF", }, - PROVONE_OUTPUTPORT: { - 'shape': 'oval', 'style': 'filled', - 'fillcolor': '#FFFC87', 'color': '#808080' + PROVONE_INPUTPORT: { + "shape": "oval", + "style": "filled", + "fillcolor": "#FFFC87", + "color": "#808080", }, - PROVONE_DATALINK: { - 'shape': 'oval', 'style': 'filled', - 'fillcolor': '#FFFC87', 'color': '#808080' + PROVONE_OUTPUTPORT: { + "shape": "oval", + "style": "filled", + "fillcolor": "#FFFC87", + "color": "#808080", }, - PROVONE_SEQCTRLLINK: { - 'shape': 'oval', 'style': 'filled', - 'fillcolor': '#FFFC87', 'color': '#808080' + PROVONE_DATALINK: { + "shape": "oval", + "style": "filled", + "fillcolor": "#FFFC87", + "color": "#808080", }, - PROVONE_USER: { - 'shape': 'house', 'style': 'filled', - 'fillcolor': '#FED37F' + PROVONE_SEQCTRLLINK: { + "shape": "oval", + "style": "filled", + "fillcolor": "#FFFC87", + "color": "#808080", }, - #PROVONE_WORKFLOW: PROV_ENTITY, - PROVONE_DATA: { - 'shape': 'oval', 'style': 'filled', - 'fillcolor': '#FFFC87', 'color': '#808080' + PROVONE_USER: {"shape": "house", "style": "filled", "fillcolor": "#FED37F"}, + # PROVONE_WORKFLOW: PROV_ENTITY, + PROVONE_DATA: { + "shape": "oval", + "style": "filled", + "fillcolor": "#FFFC87", + "color": "#808080", }, - PROVONE_HASINPORT: { - 'label': 'hasInPort', 'fontsize': '10.0' - }, - PROVONE_HASOUTPORT: { - 'label': 'hasOutPort', 'fontsize': '10.0' - }, - PROVONE_HASSUBPROCESS: { - 'label': 'hasSubProcess', 'fontsize': '10.0' - }, - PROVONE_INPORTTODL: { - 'label': 'inPortToDL', 'fontsize': '10.0' - }, - PROVONE_OUTPORTTODL: { - 'label': 'outPortToDL', 'fontsize': '10.0' - }, - PROVONE_CLTODESTP: { - 'label': 'CLtoDestP', 'fontsize': '10.0' - }, - PROVONE_SOURCEPTOCL: { - 'label': 'sourcePToCL', 'fontsize': '10.0' - }, - PROVONE_DLTOOUTPORT: { - 'label': 'DLToOutPort', 'fontsize': '10.0' - }, - PROVONE_DLTOINPORT: { - 'label': 'DLToInPort', 'fontsize': '10.0' - }, - PROVONE_DATAONLINK: { - 'label': 'dataOnLink', 'fontsize': '10.0' - }, - PROVONE_ISPARTOF: { - 'label': 'isPartOf', 'fontsize': '10.0' - }, - + PROVONE_HASINPORT: {"label": "hasInPort", "fontsize": "10.0"}, + PROVONE_HASOUTPORT: {"label": "hasOutPort", "fontsize": "10.0"}, + PROVONE_HASSUBPROCESS: {"label": "hasSubProcess", "fontsize": "10.0"}, + PROVONE_INPORTTODL: {"label": "inPortToDL", "fontsize": "10.0"}, + PROVONE_OUTPORTTODL: {"label": "outPortToDL", "fontsize": "10.0"}, + PROVONE_CLTODESTP: {"label": "CLtoDestP", "fontsize": "10.0"}, + PROVONE_SOURCEPTOCL: {"label": "sourcePToCL", "fontsize": "10.0"}, + PROVONE_DLTOOUTPORT: {"label": "DLToOutPort", "fontsize": "10.0"}, + PROVONE_DLTOINPORT: {"label": "DLToInPort", "fontsize": "10.0"}, + PROVONE_DATAONLINK: {"label": "dataOnLink", "fontsize": "10.0"}, + PROVONE_ISPARTOF: {"label": "isPartOf", "fontsize": "10.0"}, # Generic node 0: { - 'shape': 'oval', 'style': 'filled', - 'fillcolor': 'lightgray', 'color': 'dimgray' + "shape": "oval", + "style": "filled", + "fillcolor": "lightgray", + "color": "dimgray", }, # Elements PROV_ENTITY: { - 'shape': 'oval', 'style': 'filled', - 'fillcolor': '#FFFC87', 'color': '#808080' + "shape": "oval", + "style": "filled", + "fillcolor": "#FFFC87", + "color": "#808080", }, PROV_ACTIVITY: { - 'shape': 'box', 'style': 'filled', - 'fillcolor': '#9FB1FC', 'color': '#0000FF' - }, - PROV_AGENT: { - 'shape': 'house', 'style': 'filled', - 'fillcolor': '#FED37F' - }, - PROV_BUNDLE: { - 'shape': 'folder', 'style': 'filled', - 'fillcolor': 'aliceblue' + "shape": "box", + "style": "filled", + "fillcolor": "#9FB1FC", + "color": "#0000FF", }, + PROV_AGENT: {"shape": "house", "style": "filled", "fillcolor": "#FED37F"}, + PROV_BUNDLE: {"shape": "folder", "style": "filled", "fillcolor": "aliceblue"}, # Relations PROV_GENERATION: { - 'label': 'wasGeneratedBy', 'fontsize': '10.0', - 'color': 'darkgreen', 'fontcolor': 'darkgreen' + "label": "wasGeneratedBy", + "fontsize": "10.0", + "color": "darkgreen", + "fontcolor": "darkgreen", }, PROV_USAGE: { - 'label': 'used', 'fontsize': '10.0', - 'color': 'red4', 'fontcolor': 'red' - }, - PROV_COMMUNICATION: { - 'label': 'wasInformedBy', 'fontsize': '10.0' - }, - PROV_START: { - 'label': 'wasStartedBy', 'fontsize': '10.0' - }, - PROV_END: { - 'label': 'wasEndedBy', 'fontsize': '10.0' - }, - PROV_INVALIDATION: { - 'label': 'wasInvalidatedBy', 'fontsize': '10.0' - }, - PROV_DERIVATION: { - 'label': 'wasDerivedFrom', 'fontsize': '10.0' + "label": "used", + "fontsize": "10.0", + "color": "red4", + "fontcolor": "red", }, + PROV_COMMUNICATION: {"label": "wasInformedBy", "fontsize": "10.0"}, + PROV_START: {"label": "wasStartedBy", "fontsize": "10.0"}, + PROV_END: {"label": "wasEndedBy", "fontsize": "10.0"}, + PROV_INVALIDATION: {"label": "wasInvalidatedBy", "fontsize": "10.0"}, + PROV_DERIVATION: {"label": "wasDerivedFrom", "fontsize": "10.0"}, PROV_ATTRIBUTION: { - 'label': 'wasAttributedTo', 'fontsize': '10.0', - 'color': '#FED37F' + "label": "wasAttributedTo", + "fontsize": "10.0", + "color": "#FED37F", }, PROV_ASSOCIATION: { - 'label': 'wasAssociatedWith', 'fontsize': '10.0', - 'color': '#FED37F' + "label": "wasAssociatedWith", + "fontsize": "10.0", + "color": "#FED37F", }, PROV_DELEGATION: { - 'label': 'actedOnBehalfOf', 'fontsize': '10.0', - 'color': '#FED37F' - }, - PROV_INFLUENCE: { - 'label': 'wasInfluencedBy', 'fontsize': '10.0', - 'color': 'grey' - }, - PROV_ALTERNATE: { - 'label': 'alternateOf', 'fontsize': '10.0' - }, - PROV_SPECIALIZATION: { - 'label': 'specializationOf', 'fontsize': '10.0' - }, - PROV_MENTION: { - 'label': 'mentionOf', 'fontsize': '10.0' - }, - PROV_MEMBERSHIP: { - 'label': 'hadMember', 'fontsize': '10.0' + "label": "actedOnBehalfOf", + "fontsize": "10.0", + "color": "#FED37F", }, + PROV_INFLUENCE: {"label": "wasInfluencedBy", "fontsize": "10.0", "color": "grey"}, + PROV_ALTERNATE: {"label": "alternateOf", "fontsize": "10.0"}, + PROV_SPECIALIZATION: {"label": "specializationOf", "fontsize": "10.0"}, + PROV_MENTION: {"label": "mentionOf", "fontsize": "10.0"}, + PROV_MEMBERSHIP: {"label": "hadMember", "fontsize": "10.0"}, } # DOT_PROVONE_STYLE = dict.fromkeys([PROVONE_PROCESS, PROVONE_PROCESSEXEC, # PROVONE_INPUTPORT, PROVONE_OUTPUTPORT, @@ -202,19 +190,18 @@ # }) ANNOTATION_STYLE = { - 'shape': 'note', 'color': 'gray', - 'fontcolor': 'black', 'fontsize': '10' -} -ANNOTATION_LINK_STYLE = { - 'arrowhead': 'none', 'style': 'dashed', - 'color': 'gray' + "shape": "note", + "color": "gray", + "fontcolor": "black", + "fontsize": "10", } -ANNOTATION_START_ROW = '<' +ANNOTATION_LINK_STYLE = {"arrowhead": "none", "style": "dashed", "color": "gray"} +ANNOTATION_START_ROW = '<
' ANNOTATION_ROW_TEMPLATE = """ """ -ANNOTATION_END_ROW = '
%s %s
>' +ANNOTATION_END_ROW = " >" def htlm_link_if_uri(value): @@ -225,9 +212,14 @@ def htlm_link_if_uri(value): return six.text_type(value) -def provone_to_dot(bundle, show_nary=True, use_labels=False, - direction='BT', - show_element_attributes=True, show_relation_attributes=True): +def provone_to_dot( + bundle, + show_nary=True, + use_labels=False, + direction="BT", + show_element_attributes=True, + show_relation_attributes=True, +): """ Convert a provenance bundle/document into a DOT graphical representation. @@ -244,10 +236,10 @@ def provone_to_dot(bundle, show_nary=True, use_labels=False, :type show_relation_attributes: bool :returns: :class:`pydot.Dot` -- the Dot object. """ - if direction not in {'BT', 'TB', 'LR', 'RL'}: + if direction not in {"BT", "TB", "LR", "RL"}: # Invalid direction is provided - direction = 'BT' # reset it to the default value - maindot = pydot.Dot(graph_type='digraph', rankdir=direction, charset='utf-8') + direction = "BT" # reset it to the default value + maindot = pydot.Dot(graph_type="digraph", rankdir=direction, charset="utf-8") node_map = {} count = [0, 0, 0, 0] # counters for node ids @@ -256,7 +248,8 @@ def _bundle_to_dot(dot, bundle): def _attach_attribute_annotation(node, record): # Adding a node to show all attributes attributes = list( - (attr_name, value) for attr_name, value in record.attributes + (attr_name, value) + for attr_name, value in record.attributes if attr_name not in PROV_ATTRIBUTE_QNAMES ) @@ -268,20 +261,23 @@ def _attach_attribute_annotation(node, record): ann_rows = [ANNOTATION_START_ROW] ann_rows.extend( - ANNOTATION_ROW_TEMPLATE % ( - attr.uri, escape(six.text_type(attr)), - ' href=\"%s\"' % value.uri if isinstance(value, Identifier) - else '', - escape(six.text_type(value) - if not isinstance(value, datetime) else - six.text_type(value.isoformat()))) + ANNOTATION_ROW_TEMPLATE + % ( + attr.uri, + escape(six.text_type(attr)), + ' href="%s"' % value.uri if isinstance(value, Identifier) else "", + escape( + six.text_type(value) + if not isinstance(value, datetime) + else six.text_type(value.isoformat()) + ), + ) for attr, value in attributes ) ann_rows.append(ANNOTATION_END_ROW) count[3] += 1 annotations = pydot.Node( - 'ann%d' % count[3], label='\n'.join(ann_rows), - **ANNOTATION_STYLE + "ann%d" % count[3], label="\n".join(ann_rows), **ANNOTATION_STYLE ) dot.add_node(annotations) dot.add_edge(pydot.Edge(annotations, node, **ANNOTATION_LINK_STYLE)) @@ -289,7 +285,7 @@ def _attach_attribute_annotation(node, record): def _add_bundle(bundle): count[2] += 1 subdot = pydot.Cluster( - graph_name='c%d' % count[2], URL='"%s"' % bundle.identifier.uri + graph_name="c%d" % count[2], URL='"%s"' % bundle.identifier.uri ) if use_labels: if bundle.label == bundle.identifier: @@ -298,12 +294,14 @@ def _add_bundle(bundle): # Fancier label if both are different. The label will be # the main node text, whereas the identifier will be a # kind of subtitle. - bundle_label = ('<%s
' - '' - '%s>') + bundle_label = ( + "<%s
" + '' + "%s>" + ) bundle_label = bundle_label % ( six.text_type(bundle.label), - six.text_type(bundle.identifier) + six.text_type(bundle.identifier), ) subdot.set_label('"%s"' % six.text_type(bundle_label)) else: @@ -314,7 +312,7 @@ def _add_bundle(bundle): def _add_node(record): count[0] += 1 - node_id = 'n%d' % count[0] + node_id = "n%d" % count[0] if use_labels: if record.label == record.identifier: node_label = '"%s"' % six.text_type(record.label) @@ -322,11 +320,15 @@ def _add_node(record): # Fancier label if both are different. The label will be # the main node text, whereas the identifier will be a # kind of subtitle. - node_label = ('<%s
' - '' - '%s>') - node_label = node_label % (six.text_type(record.label), - six.text_type(record.identifier)) + node_label = ( + "<%s
" + '' + "%s>" + ) + node_label = node_label % ( + six.text_type(record.label), + six.text_type(record.identifier), + ) else: node_label = '"%s"' % six.text_type(record.identifier) @@ -334,9 +336,7 @@ def _add_node(record): print("record type: ", record.get_type()) style = DOT_PROVONE_STYLE[record.get_type()] print("style: ", style) - node = pydot.Node( - node_id, label=node_label, URL='"%s"' % uri, **style - ) + node = pydot.Node(node_id, label=node_label, URL='"%s"' % uri, **style) node_map[uri] = node dot.add_node(node) @@ -346,24 +346,20 @@ def _add_node(record): def _add_generic_node(qname): count[0] += 1 - node_id = 'n%d' % count[0] + node_id = "n%d" % count[0] node_label = '"%s"' % six.text_type(qname) uri = qname.uri style = DOT_PROVONE_STYLE[0] - node = pydot.Node( - node_id, label=node_label, URL='"%s"' % uri, **style - ) + node = pydot.Node(node_id, label=node_label, URL='"%s"' % uri, **style) node_map[uri] = node dot.add_node(node) return node def _get_bnode(): count[1] += 1 - bnode_id = 'b%d' % count[1] - bnode = pydot.Node( - bnode_id, label='""', shape='point', color='gray' - ) + bnode_id = "b%d" % count[1] + bnode = pydot.Node(bnode_id, label='""', shape="point", color="gray") dot.add_node(bnode) return bnode @@ -395,16 +391,16 @@ def _get_node(qname): continue # picking element nodes nodes = [ - value for attr_name, value in rec.formal_attributes + value + for attr_name, value in rec.formal_attributes if attr_name in PROVONE_ATTRIBUTE_QNAMES ] other_attributes = [ - (attr_name, value) for attr_name, value in rec.attributes + (attr_name, value) + for attr_name, value in rec.attributes if attr_name not in PROV_ATTRIBUTE_QNAMES ] - add_attribute_annotation = ( - show_relation_attributes and other_attributes - ) + add_attribute_annotation = show_relation_attributes and other_attributes add_nary_elements = len(nodes) > 2 and show_nary style = DOT_PROVONE_STYLE[rec.get_type()] if len(nodes) < 2: # too few elements for a relation? @@ -416,29 +412,23 @@ def _get_node(qname): # the first segment dot.add_edge( - pydot.Edge( - _get_node(nodes[0]), bnode, arrowhead='none', **style - ) + pydot.Edge(_get_node(nodes[0]), bnode, arrowhead="none", **style) ) style = dict(style) # copy the style - del style['label'] # not showing label in the second segment + del style["label"] # not showing label in the second segment # the second segment dot.add_edge(pydot.Edge(bnode, _get_node(nodes[1]), **style)) if add_nary_elements: - style['color'] = 'gray' # all remaining segment to be gray + style["color"] = "gray" # all remaining segment to be gray for node in nodes[2:]: if node is not None: - dot.add_edge( - pydot.Edge(bnode, _get_node(node), **style) - ) + dot.add_edge(pydot.Edge(bnode, _get_node(node), **style)) if add_attribute_annotation: _attach_attribute_annotation(bnode, rec) else: # show a simple binary relations with no annotation dot.add_edge( - pydot.Edge( - _get_node(nodes[0]), _get_node(nodes[1]), **style - ) + pydot.Edge(_get_node(nodes[0]), _get_node(nodes[1]), **style) ) try: diff --git a/nidm/core/provone.py b/nidm/core/provone.py index 16a419b5..b7d5df69 100644 --- a/nidm/core/provone.py +++ b/nidm/core/provone.py @@ -5,136 +5,195 @@ ProvONE: http://vcvcomputing.com/provone/provone.html """ +import io import logging import os -import io import shutil import tempfile -from six.moves.urllib.parse import urlparse - import nidm.core.serializers -from prov.constants import PROV_N_MAP, PROV_ATTR_STARTTIME, PROV_ATTR_ENDTIME, \ - PROV_ATTR_TIME, PROV_DERIVATION, PROV_GENERATION, PROV_USAGE, \ - PROV_COMMUNICATION, PROV_ASSOCIATION, PROV_ATTRIBUTION, PROV_ATTR_COLLECTION, \ - PROV_ATTR_USAGE, PROV_MEMBERSHIP -from prov.model import ProvEntity, ProvAgent, ProvDocument, ProvAttribution, \ - PROV_REC_CLS, ProvActivity, _ensure_datetime, ProvAssociation, \ - ProvCommunication, ProvDerivation, ProvRelation, ProvGeneration, ProvUsage, \ - ProvMembership -from .Constants import PROVONE_N_MAP, PROVONE_PROCESS, PROVONE_INPUTPORT, \ - PROVONE_OUTPUTPORT, PROVONE_DATA, PROVONE_DATALINK, PROVONE_SEQCTRLLINK, \ - PROVONE_USER, PROVONE_PROCESSEXEC, PROVONE_ATTR_PROCESS, PROVONE_ATTR_USER, \ - PROVONE_ATTR_PROCESSEXEC, PROVONE_ATTR_PLAN, \ - PROVONE_ATTR_INFORMED, PROVONE_ATTR_INFORMANT, \ - PROVONE_ATTR_GENERATED_DATA, PROVONE_ATTR_USED_DATA, \ - PROVONE_ATTR_GENERATION, \ - PROVONE_ATTR_DATA, PROVONE_ATTR_INPUTPORT, \ - PROVONE_HASINPORT, PROVONE_ATTR_OUTPUTPORT, PROVONE_HASOUTPORT, \ - PROVONE_HASSUBPROCESS, PROVONE_ATTR_DATALINK, PROVONE_INPORTTODL, \ - PROVONE_OUTPORTTODL, PROVONE_DLTOOUTPORT, PROVONE_DLTOINPORT, \ - PROVONE_ATTR_SEQCTRLLINK, PROVONE_CLTODESTP, PROVONE_SOURCEPTOCL, \ - PROVONE_DATAONLINK, PROVONE_HASDEFAULTPARAM, \ - PROVONE_ATTR_GENERATED_PROCESS, PROVONE_ATTR_USED_PROCESS, PROVONE_ISPARTOF, \ - PROVONE_ATTR_RELATED_PREXEC, PROVONE_ATTR_USED_PREXEC, \ - PROVONE_ATTR_CHILD_PREXEC, PROVONE_MEMBERSHIP - -__author__ = 'Sanu Ann Abraham' -__email__ = 'sanuann@mit.edu' +from prov.constants import ( + PROV_ASSOCIATION, + PROV_ATTR_COLLECTION, + PROV_ATTR_ENDTIME, + PROV_ATTR_STARTTIME, + PROV_ATTR_TIME, + PROV_ATTR_USAGE, + PROV_ATTRIBUTION, + PROV_COMMUNICATION, + PROV_DERIVATION, + PROV_GENERATION, + PROV_MEMBERSHIP, + PROV_N_MAP, + PROV_USAGE, +) +from prov.model import ( + PROV_REC_CLS, + ProvActivity, + ProvAgent, + ProvAssociation, + ProvAttribution, + ProvCommunication, + ProvDerivation, + ProvDocument, + ProvEntity, + ProvGeneration, + ProvMembership, + ProvRelation, + ProvUsage, + _ensure_datetime, +) +from six.moves.urllib.parse import urlparse +from .Constants import ( + PROVONE_ATTR_CHILD_PREXEC, + PROVONE_ATTR_DATA, + PROVONE_ATTR_DATALINK, + PROVONE_ATTR_GENERATED_DATA, + PROVONE_ATTR_GENERATED_PROCESS, + PROVONE_ATTR_GENERATION, + PROVONE_ATTR_INFORMANT, + PROVONE_ATTR_INFORMED, + PROVONE_ATTR_INPUTPORT, + PROVONE_ATTR_OUTPUTPORT, + PROVONE_ATTR_PLAN, + PROVONE_ATTR_PROCESS, + PROVONE_ATTR_PROCESSEXEC, + PROVONE_ATTR_RELATED_PREXEC, + PROVONE_ATTR_SEQCTRLLINK, + PROVONE_ATTR_USED_DATA, + PROVONE_ATTR_USED_PREXEC, + PROVONE_ATTR_USED_PROCESS, + PROVONE_ATTR_USER, + PROVONE_CLTODESTP, + PROVONE_DATA, + PROVONE_DATALINK, + PROVONE_DATAONLINK, + PROVONE_DLTOINPORT, + PROVONE_DLTOOUTPORT, + PROVONE_HASDEFAULTPARAM, + PROVONE_HASINPORT, + PROVONE_HASOUTPORT, + PROVONE_HASSUBPROCESS, + PROVONE_INPORTTODL, + PROVONE_INPUTPORT, + PROVONE_ISPARTOF, + PROVONE_MEMBERSHIP, + PROVONE_N_MAP, + PROVONE_OUTPORTTODL, + PROVONE_OUTPUTPORT, + PROVONE_PROCESS, + PROVONE_PROCESSEXEC, + PROVONE_SEQCTRLLINK, + PROVONE_SOURCEPTOCL, + PROVONE_USER, +) + +__author__ = "Sanu Ann Abraham" +__email__ = "sanuann@mit.edu" logger = logging.getLogger(__name__) # add ProvOne Notation mapping to Prov_N_MAP dict -#PROV_N_MAP.update(PROVONE_N_MAP) +# PROV_N_MAP.update(PROVONE_N_MAP) PROVONE_N_MAP.update(PROV_N_MAP) class ProvPlan(ProvEntity): - """ - ProvONE Plan element - """ - pass + """ + ProvONE Plan element + """ + + pass class Process(ProvEntity): - """ - ProvONE Process element """ + """ + ProvONE Process element""" - _prov_type = PROVONE_PROCESS + _prov_type = PROVONE_PROCESS class InputPort(ProvEntity): - """ ProvONE Input Port element """ + """ProvONE Input Port element""" - _prov_type = PROVONE_INPUTPORT + _prov_type = PROVONE_INPUTPORT class OutputPort(ProvEntity): - """ ProvONE Output Port element""" + """ProvONE Output Port element""" - _prov_type = PROVONE_OUTPUTPORT + _prov_type = PROVONE_OUTPUTPORT class Data(ProvEntity): - """ - basic unit of information consumed or produced by a Process. Multiple Data items may be grouped into a Collection. - """ - _prov_type = PROVONE_DATA + """ + basic unit of information consumed or produced by a Process. Multiple Data items may be grouped into a Collection. + """ + + _prov_type = PROVONE_DATA class DataLink(ProvEntity): - """ ProvONE DataLink Element """ + """ProvONE DataLink Element""" - _prov_type = PROVONE_DATALINK + _prov_type = PROVONE_DATALINK class SeqCtrlLink(ProvEntity): - """ ProvONE SeqCtrlLink Element """ + """ProvONE SeqCtrlLink Element""" - _prov_type = PROVONE_SEQCTRLLINK + _prov_type = PROVONE_SEQCTRLLINK class User(ProvAgent): - """ProvONE User element.""" + """ProvONE User element.""" - _prov_type = PROVONE_USER + _prov_type = PROVONE_USER class ProcessExec(ProvActivity): - """ ProvONE Process Execution element. """ + """ProvONE Process Execution element.""" - _prov_type = PROVONE_PROCESSEXEC + _prov_type = PROVONE_PROCESSEXEC class Attribution(ProvAttribution): - """ProvONE Attribution relationship.""" + """ProvONE Attribution relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESS, PROVONE_ATTR_USER) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESS, PROVONE_ATTR_USER) - _prov_type = PROV_ATTRIBUTION + _prov_type = PROV_ATTRIBUTION class Association(ProvAssociation): - """Provenance Association relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESSEXEC, PROVONE_ATTR_PROCESS, - PROVONE_ATTR_PLAN) + """Provenance Association relationship.""" + + FORMAL_ATTRIBUTES = ( + PROVONE_ATTR_PROCESSEXEC, + PROVONE_ATTR_PROCESS, + PROVONE_ATTR_PLAN, + ) - _prov_type = PROV_ASSOCIATION + _prov_type = PROV_ASSOCIATION class Communication(ProvCommunication): - """Provenance Communication relationship.""" + """Provenance Communication relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_INFORMED, PROVONE_ATTR_INFORMANT) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_INFORMED, PROVONE_ATTR_INFORMANT) - _prov_type = PROV_COMMUNICATION + _prov_type = PROV_COMMUNICATION class Derivation(ProvDerivation): """Provenance Derivation relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_GENERATED_DATA, PROVONE_ATTR_USED_DATA, - PROVONE_ATTR_PROCESSEXEC, PROVONE_ATTR_GENERATION, - PROV_ATTR_USAGE) + FORMAL_ATTRIBUTES = ( + PROVONE_ATTR_GENERATED_DATA, + PROVONE_ATTR_USED_DATA, + PROVONE_ATTR_PROCESSEXEC, + PROVONE_ATTR_GENERATION, + PROV_ATTR_USAGE, + ) _prov_type = PROV_DERIVATION @@ -144,7 +203,7 @@ class Generation(ProvGeneration): FORMAL_ATTRIBUTES = (PROVONE_ATTR_DATA, PROVONE_ATTR_PROCESSEXEC, PROV_ATTR_TIME) - #_prov_type = PROV_GENERATION + # _prov_type = PROV_GENERATION class Usage(ProvUsage): @@ -152,14 +211,13 @@ class Usage(ProvUsage): FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESSEXEC, PROVONE_ATTR_DATA, PROV_ATTR_TIME) - #_prov_type = PROV_USAGE + # _prov_type = PROV_USAGE class Partnership(ProvRelation): """Provenance Membership relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_USED_PREXEC, - PROVONE_ATTR_CHILD_PREXEC) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_USED_PREXEC, PROVONE_ATTR_CHILD_PREXEC) _prov_type = PROVONE_ISPARTOF @@ -170,697 +228,749 @@ class Membership(ProvMembership): FORMAL_ATTRIBUTES = (PROV_ATTR_COLLECTION, PROVONE_ATTR_DATA) - #_prov_type = PROV_MEMBERSHIP + # _prov_type = PROV_MEMBERSHIP class HasInput(ProvRelation): - """ProvONE HasInput Port relationship.""" + """ProvONE HasInput Port relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESS, - PROVONE_ATTR_INPUTPORT) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESS, PROVONE_ATTR_INPUTPORT) - _prov_type = PROVONE_HASINPORT + _prov_type = PROVONE_HASINPORT class HasOutput(ProvRelation): - """ProvONE HasOutput Port relationship.""" + """ProvONE HasOutput Port relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESS, PROVONE_ATTR_OUTPUTPORT) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESS, PROVONE_ATTR_OUTPUTPORT) - _prov_type = PROVONE_HASOUTPORT + _prov_type = PROVONE_HASOUTPORT class HasSubProcess(ProvRelation): - """ProvONE Has SubProcess relationship.""" + """ProvONE Has SubProcess relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_USED_PROCESS, PROVONE_ATTR_GENERATED_PROCESS) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_USED_PROCESS, PROVONE_ATTR_GENERATED_PROCESS) - _prov_type = PROVONE_HASSUBPROCESS + _prov_type = PROVONE_HASSUBPROCESS class InToDL(ProvRelation): - """ ProvONE InPort to DL relationship """ + """ProvONE InPort to DL relationship""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_INPUTPORT, PROVONE_ATTR_DATALINK) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_INPUTPORT, PROVONE_ATTR_DATALINK) - _prov_type = PROVONE_INPORTTODL + _prov_type = PROVONE_INPORTTODL class OutToDL(ProvRelation): - """ ProvONE Output port to DL relationship """ + """ProvONE Output port to DL relationship""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_OUTPUTPORT, PROVONE_ATTR_DATALINK) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_OUTPUTPORT, PROVONE_ATTR_DATALINK) - _prov_type = PROVONE_OUTPORTTODL + _prov_type = PROVONE_OUTPORTTODL class DLtoOutPort(ProvRelation): - """ ProvONE DL to Output port relationship """ + """ProvONE DL to Output port relationship""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_DATALINK, PROVONE_ATTR_OUTPUTPORT) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_DATALINK, PROVONE_ATTR_OUTPUTPORT) - _prov_type = PROVONE_DLTOOUTPORT + _prov_type = PROVONE_DLTOOUTPORT class DLtoInPort(ProvRelation): - """ ProvONE DL to Input port relationship """ + """ProvONE DL to Input port relationship""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_DATALINK, PROVONE_ATTR_INPUTPORT) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_DATALINK, PROVONE_ATTR_INPUTPORT) - _prov_type = PROVONE_DLTOINPORT + _prov_type = PROVONE_DLTOINPORT class CLtoDestP(ProvRelation): - """ProvONE CLtoDestP relationship.""" + """ProvONE CLtoDestP relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_SEQCTRLLINK, PROVONE_ATTR_PROCESS) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_SEQCTRLLINK, PROVONE_ATTR_PROCESS) - _prov_type = PROVONE_CLTODESTP + _prov_type = PROVONE_CLTODESTP class SourcePtoCL(ProvRelation): - """ProvONE SourcePtoCL relationship.""" + """ProvONE SourcePtoCL relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESS, PROVONE_ATTR_SEQCTRLLINK) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESS, PROVONE_ATTR_SEQCTRLLINK) - _prov_type = PROVONE_SOURCEPTOCL + _prov_type = PROVONE_SOURCEPTOCL class DataLinkage(ProvRelation): - """ ProvONE dataOnLink relationship """ + """ProvONE dataOnLink relationship""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_DATA, PROVONE_ATTR_DATALINK, - PROVONE_ATTR_PROCESS) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_DATA, PROVONE_ATTR_DATALINK, PROVONE_ATTR_PROCESS) - _prov_type = PROVONE_DATAONLINK + _prov_type = PROVONE_DATAONLINK class Parameterization(ProvRelation): - """ ProvONE hasDefaultParam relationship. """ + """ProvONE hasDefaultParam relationship.""" - FORMAL_ATTRIBUTES = (PROVONE_ATTR_INPUTPORT, PROVONE_ATTR_DATA) + FORMAL_ATTRIBUTES = (PROVONE_ATTR_INPUTPORT, PROVONE_ATTR_DATA) - _prov_type = PROVONE_HASDEFAULTPARAM + _prov_type = PROVONE_HASDEFAULTPARAM -class Workflow(Process, ): - pass +class Workflow( + Process, +): + pass # Class mappings from PROVONE record type -PROV_REC_CLS.update({ - PROVONE_PROCESS: Process, - PROVONE_PROCESSEXEC: ProcessExec, - PROVONE_DATA: Data, - PROV_ATTRIBUTION: Attribution, - PROV_ASSOCIATION: Association, - PROV_COMMUNICATION: Communication, - PROV_DERIVATION: Derivation, - PROV_GENERATION: Generation, - PROV_USAGE: Usage, - PROVONE_INPUTPORT: InputPort, - PROVONE_HASINPORT: HasInput, - PROVONE_OUTPUTPORT: OutputPort, - PROVONE_HASOUTPORT: HasOutput, - PROVONE_HASSUBPROCESS: HasSubProcess, - PROVONE_DATALINK: DataLink, - PROVONE_INPORTTODL: InToDL, - PROVONE_SEQCTRLLINK: SeqCtrlLink, - PROVONE_CLTODESTP: CLtoDestP, - PROVONE_SOURCEPTOCL: SourcePtoCL, - PROVONE_OUTPORTTODL: OutToDL, - PROVONE_DLTOOUTPORT: DLtoOutPort, - PROVONE_DLTOINPORT: DLtoInPort, - PROVONE_DATAONLINK: DataLinkage, - PROVONE_HASDEFAULTPARAM: Parameterization, - PROVONE_USER: User, - PROVONE_ISPARTOF: Partnership, - PROV_MEMBERSHIP: Membership, - -}) +PROV_REC_CLS.update( + { + PROVONE_PROCESS: Process, + PROVONE_PROCESSEXEC: ProcessExec, + PROVONE_DATA: Data, + PROV_ATTRIBUTION: Attribution, + PROV_ASSOCIATION: Association, + PROV_COMMUNICATION: Communication, + PROV_DERIVATION: Derivation, + PROV_GENERATION: Generation, + PROV_USAGE: Usage, + PROVONE_INPUTPORT: InputPort, + PROVONE_HASINPORT: HasInput, + PROVONE_OUTPUTPORT: OutputPort, + PROVONE_HASOUTPORT: HasOutput, + PROVONE_HASSUBPROCESS: HasSubProcess, + PROVONE_DATALINK: DataLink, + PROVONE_INPORTTODL: InToDL, + PROVONE_SEQCTRLLINK: SeqCtrlLink, + PROVONE_CLTODESTP: CLtoDestP, + PROVONE_SOURCEPTOCL: SourcePtoCL, + PROVONE_OUTPORTTODL: OutToDL, + PROVONE_DLTOOUTPORT: DLtoOutPort, + PROVONE_DLTOINPORT: DLtoInPort, + PROVONE_DATAONLINK: DataLinkage, + PROVONE_HASDEFAULTPARAM: Parameterization, + PROVONE_USER: User, + PROVONE_ISPARTOF: Partnership, + PROV_MEMBERSHIP: Membership, + } +) class ProvONEDocument(ProvDocument): - """ ProvONE Document""" - - def __repr__(self): - return '' - - def process(self, identifier, other_attributes=None): - """ - Creates a new process. - - :param identifier: Identifier for new process. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record(PROVONE_PROCESS, identifier, None, - other_attributes) - - def user(self, identifier, other_attributes=None): - """ - Creates a new user. - - :param identifier: Identifier for new user. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record(PROVONE_USER, identifier, None, other_attributes) - - def data(self, identifier, other_attributes=None): - """ - Creates a new data. - - :param identifier: Identifier for new data. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record(PROVONE_DATA, identifier, None, - other_attributes) - - def attribution(self, process_spec, user, identifier=None, - other_attributes=None): - """ - Creates a new attribution record between a process specification and an user. - - :param process_spec: ProcessSpecification or a string identifier for the process spec (relationship - source). - :param user: User or string identifier of the user involved in the - attribution (relationship destination). - :param identifier: Identifier for new attribution record. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record( - PROV_ATTRIBUTION, identifier, { - PROVONE_ATTR_PROCESS: process_spec, - PROVONE_ATTR_USER: user - }, - other_attributes - ) - - def processExec(self, identifier, startTime=None, endTime=None, - other_attributes=None): - """ - Creates a new process execution. - - :param identifier: Identifier for new process execution. - :param startTime: Optional start time for the process execution (default: - None). - Either a :py:class:`datetime.datetime` object or a string that can be - parsed by :py:func:`dateutil.parser`. - :param endTime: Optional end time for the process execution (default: None). - Either a :py:class:`datetime.datetime` object or a string that can be - parsed by :py:func:`dateutil.parser`. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record( - PROVONE_PROCESSEXEC, identifier, { - PROV_ATTR_STARTTIME: _ensure_datetime(startTime), - PROV_ATTR_ENDTIME: _ensure_datetime(endTime) - }, - other_attributes - ) - - def association(self, process_exec, process_spec=None, plan=None, - identifier=None, other_attributes=None): - """ - Creates a new association record for a process execution. - - :param process_exec: Process Execution or a string identifier for the - process execution. - :param process_spec: Process Spec or string identifier of the process - involved in the association (default: None). - :param plan: Optionally extra entity to state qualified association through - an internal plan (default: None). - :param identifier: Identifier for new association record. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record( - PROV_ASSOCIATION, identifier, { - PROVONE_ATTR_PROCESSEXEC: process_exec, - PROVONE_ATTR_PROCESS: process_spec, - PROVONE_ATTR_PLAN: plan - }, - other_attributes - ) - - def derivation(self, generatedData, usedData, process_exec=None, - generation=None, usage=None, - identifier=None, other_attributes=None): - """ - Creates a new derivation record for a generated data from a used data. - - :param generatedData: Data or a string identifier for the generated - data (relationship source). - :param usedData: Data or a string identifier for the used data - (relationship destination). - :param process_exec: Process execution or string identifier of the - processExec involved in the derivation (default: None). - :param generation: Optionally extra activity to state qualified generation - through a generation (default: None). - :param usage: XXX (default: None). - :param identifier: Identifier for new derivation record. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - attributes = {PROVONE_ATTR_GENERATED_DATA: generatedData, - PROVONE_ATTR_USED_DATA: usedData, - PROVONE_ATTR_PROCESSEXEC: process_exec, - PROVONE_ATTR_GENERATION: generation, - PROV_ATTR_USAGE: usage} - return self.new_record( - PROV_DERIVATION, identifier, attributes, other_attributes - ) - - def generation(self, data, process_exec=None, time=None, identifier=None, - other_attributes=None): - """ - Creates a new generation record for a data. - - :param data: Data or a string identifier for the data. - :param process_exec: Process execution or string identifier of the - process_exec involved in the generation (default: None). - :param time: Optional time for the generation (default: None). - Either a :py:class:`datetime.datetime` object or a string that can be - parsed by :py:func:`dateutil.parser`. - :param identifier: Identifier for new generation record. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record( - PROV_GENERATION, identifier, { - PROVONE_ATTR_DATA: data, - PROVONE_ATTR_PROCESSEXEC: process_exec, - PROV_ATTR_TIME: _ensure_datetime(time) - }, - other_attributes - ) - - def usage(self, process_exec, data=None, time=None, identifier=None, - other_attributes=None): - """ - Creates a new usage record for a process execution. - - :param process_exec: Process Execution or a string identifier for the - processExec. - :param data: Data or string identifier of the data involved in - the usage relationship (default: None). - :param time: Optional time for the usage (default: None). - Either a :py:class:`datetime.datetime` object or a string that can be - parsed by :py:func:`dateutil.parser`. - :param identifier: Identifier for new usage record. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record( - PROV_USAGE, identifier, { - PROVONE_ATTR_PROCESSEXEC: process_exec, - PROVONE_ATTR_DATA: data, - PROV_ATTR_TIME: _ensure_datetime(time)}, - other_attributes - ) - - def communication(self, informed, informant, identifier=None, - other_attributes=None): - """ - Creates a new communication record for a process exec. - - :param informed: The informed processExec (relationship destination). - :param informant: The informing processExec (relationship source). - :param identifier: Identifier for new communication record. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record( - PROV_COMMUNICATION, identifier, { - PROVONE_ATTR_INFORMED: informed, - PROVONE_ATTR_INFORMANT: informant - }, - other_attributes - ) - - def input_port(self, identifier, other_attributes=None): - """ - Creates a new input port. - - :param identifier: Identifier for new input port. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record(PROVONE_INPUTPORT, identifier, None, - other_attributes) - - def has_in_ports(self, process, in_ports, identifier=None, - other_attributes=None): - """ - Creates a new input port record for a process. - - :param process: Process or a string identifier for the - process(relationship source). - :param in_ports: Input Port or string identifier for the used input port ( - relationship destination). - :param identifier: Identifier for new input port membership. - :param other_attributes: Optional other attributes as a dictionary or - list of tuples to be added to the record optionally (default: None). - """ - return self.new_record( - PROVONE_HASINPORT, identifier, { - PROVONE_ATTR_PROCESS: process, - PROVONE_ATTR_INPUTPORT: in_ports, - }, - other_attributes - ) - - def output_port(self, identifier, other_attributes=None): - """ - Creates a new output port. - - :param identifier: Identifier for new output port. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record(PROVONE_OUTPUTPORT, identifier, None, - other_attributes) - - def has_out_ports(self, process, out_ports, identifier=None, - other_attributes=None): - """ - Creates a new input port record for a process. - - :param process: Process or a string identifier for the - process(relationship source). - :param out_ports: Output Port or string identifier for the used output - port (relationship destination). - :param identifier: Identifier for new output port membership. - :param other_attributes: Optional other attributes as a dictionary or - list of tuples to be added to the record optionally (default: None). - """ - return self.new_record( - PROVONE_HASOUTPORT, identifier, { - PROVONE_ATTR_PROCESS: process, - PROVONE_ATTR_OUTPUTPORT: out_ports, - }, - other_attributes - ) - - def has_sub_process(self, used_process, generated_process, identifier=None, - other_attributes=None ): - """ - Creates a new has-sub-process record for a generated process from a - used process. - - :param used_process: Process or a string identifier for the - used process (relationship source). - :param generated_process: Process or a string identifier for the - generated process (relationship destination). - :param identifier: Identifier for new sub-process record. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - attributes = {PROVONE_ATTR_USED_PROCESS: used_process, - PROVONE_ATTR_GENERATED_PROCESS: generated_process} - return self.new_record( - PROVONE_HASSUBPROCESS, identifier, attributes, other_attributes - ) - - def dataLink(self, identifier, other_attributes=None): - """ - Creates a new data link. - - :param identifier: Identifier for new data link. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record(PROVONE_DATALINK, identifier, None, - other_attributes) - - def inPortToDL(self, in_port, dt_link, identifier=None, - other_attributes=None): - """ - - :param in_port: Input port or a string identifier for the - in_port(relationship source). - :param dt_link: Data Link or string identifier for the used data link ( - relationship destination). - :param identifier: Identifier for new data link membership. - :param other_attributes: Optional other attributes as a dictionary or - list of tuples to be added to the record optionally (default: None). - - """ - return self.new_record( - PROVONE_INPORTTODL, identifier, { - PROVONE_ATTR_INPUTPORT: in_port, - PROVONE_ATTR_DATALINK: dt_link, - }, - other_attributes - ) - - def outPortToDL(self, out_port, dt_link, identifier=None, - other_attributes=None): - """ - - :param out_port: Output port or a string identifier for the - out_port(relationship source). - :param dt_link: Data Link or string identifier for the used data link ( - relationship destination). - :param identifier: Identifier for new output-data link membership. - :param other_attributes: Optional other attributes as a dictionary or - list of tuples to be added to the record optionally (default: None). - - """ - return self.new_record( - PROVONE_OUTPORTTODL, identifier, { - PROVONE_ATTR_OUTPUTPORT: out_port, - PROVONE_ATTR_DATALINK: dt_link, - }, - other_attributes - ) - - def DLToOutPort(self, dt_link, out_port, identifier=None, - other_attributes=None): - """ - - :param dt_link: Data Link or string identifier for the used data link ( - relationship source). - :param out_port: Output port or a string identifier for the - out_port(relationship destination). - :param identifier: Identifier for new data link-output membership. - :param other_attributes: Optional other attributes as a dictionary or - list of tuples to be added to the record optionally (default: None). - - """ - return self.new_record( - PROVONE_DLTOOUTPORT, identifier, { - PROVONE_ATTR_DATALINK: dt_link, - PROVONE_ATTR_OUTPUTPORT: out_port, - }, - other_attributes - ) - - def DLToInPort(self, dt_link, in_port, identifier=None, - other_attributes=None): - """ - - :param dt_link: Data Link or string identifier for the used data link ( - relationship source). - :param in_port: Input port or a string identifier for the in_port ( - relationship destination). - :param identifier: Identifier for new data link-output membership. - :param other_attributes: Optional other attributes as a dictionary or - list of tuples to be added to the record optionally (default: None). - - """ - return self.new_record( - PROVONE_DLTOINPORT, identifier, { - PROVONE_ATTR_DATALINK: dt_link, - PROVONE_ATTR_INPUTPORT: in_port, - }, - other_attributes - ) - - def seqCtrlLink(self, identifier, other_attributes=None): - """ - Creates a new seq ctrl link. - - :param identifier: Identifier for new seq ctrl link. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - - """ - return self.new_record(PROVONE_SEQCTRLLINK, identifier, None, - other_attributes) - - def control_link_to_process(self, used_cntrl_link, used_process, - identifier=None, other_attributes=None): - """ - - :param used_cntrl_link: Control Link or a string identifier for the used - control link (relationship source). - :param used_process: Data Link or string identifier for the used process - (relationship destination). - :param identifier: Identifier for new control link to process relation. - :param other_attributes: Optional other attributes as a dictionary or - list of tuples to be added to the record optionally (default: None). - - """ - return self.new_record( - PROVONE_CLTODESTP, identifier, { - PROVONE_ATTR_SEQCTRLLINK: used_cntrl_link, - PROVONE_ATTR_PROCESS: used_process, - }, - other_attributes) - - def process_to_control_link(self, used_process, used_cntrl_link, - identifier=None, other_attributes=None): - """ - - :param used_process: Process or string identifier for the used process - (relationship source). - :param used_cntrl_link: Control Link or a string identifier for the used - control link (relationship destination). - :param identifier: Identifier for new process to control link relation. - :param other_attributes: Optional other attributes as a dictionary or - list of tuples to be added to the record optionally (default: None). - - """ - return self.new_record( - PROVONE_SOURCEPTOCL, identifier, { - PROVONE_ATTR_PROCESS: used_process, - PROVONE_ATTR_SEQCTRLLINK: used_cntrl_link, - }, - other_attributes) - - def linkage(self, data_item, dl_link, related_process=None, - identifier=None, - other_attributes=None): - """ - - :param data_item: Data or string identifier for the associated data ( - relationship source). - :param dl_link: Data link or string identifier for the data link ( - relationship destination). - :param related_process: Process or string identifier of the data link (default=None) - :param identifier: Identifier for new data-on-link relation. - :param other_attributes: Optional other attributes as a dictionary or - list of tuples to be added to the record optionally (default: None). - - """ - return self.new_record( - PROVONE_DATAONLINK, identifier, { - PROVONE_ATTR_DATA: data_item, - PROVONE_ATTR_DATALINK: dl_link, - PROVONE_ATTR_PROCESS: related_process, - }, - other_attributes) - - def parameterization(self, in_port, data_item, identifier=None, - other_attributes=None): - """ - - :param in_port: InputPort or string identifier for the associated in port ( - relationship source). - :param data_item: Data item or string identifier for the associated - default parameter (relationship destination). - :param identifier: Identifier for new default parameter relation. - :param other_attributes: Optional other attributes as a dictionary or - list of tuples to be added to the record optionally (default: None). - """ - return self.new_record( - PROVONE_HASDEFAULTPARAM, identifier, { - PROVONE_ATTR_INPUTPORT: in_port, - PROVONE_ATTR_DATA: data_item, - }, - other_attributes) - - def serialize(self, destination=None, format='json', **args): - """ - Serialize the :py:class:`ProvDocument` to the destination. - - Available serializers can be queried by the value of - `:py:attr:~prov.serializers.Registry.serializers` after loading them via - `:py:func:~prov.serializers.Registry.load_serializers()`. - - :param destination: Stream object to serialize the output to. Default is - `None`, which serializes as a string. - :param format: Serialization format (default: 'json'), defaulting to - PROV-JSON. - :return: Serialization in a string if no destination was given, - None otherwise. - """ - serializer = nidm.core.serializers.get(format)(self) - if destination is None: - stream = io.StringIO() - serializer.serialize(stream, **args) - return stream.getvalue() - if hasattr(destination, "write"): - stream = destination - serializer.serialize(stream, **args) - else: - location = destination - scheme, netloc, path, params, _query, fragment = urlparse(location) - if netloc != "": - print("WARNING: not saving as location " + - "is not a local file reference") - return - fd, name = tempfile.mkstemp() - stream = os.fdopen(fd, "wb") - serializer.serialize(stream, **args) - stream.close() - if hasattr(shutil, "move"): - shutil.move(name, path) - else: - shutil.copy(name, path) - os.remove(name) - - def is_part_of(self, used_processex, child_processex, identifier=None, - other_attributes=None): - """ - Creates a new is-part-of record for a process exec. - - :param used_processex: The parent processExec (relationship source). - :param child_processex: The child processExec (relationship destination). - :param identifier: Identifier for new is-part-of record. - :param other_attributes: Optional other attributes as a dictionary or list - of tuples to be added to the record optionally (default: None). - """ - return self.new_record( - PROVONE_ISPARTOF, identifier, { - PROVONE_ATTR_USED_PREXEC: used_processex, - PROVONE_ATTR_CHILD_PREXEC: child_processex - }, - other_attributes - ) - - def membership(self, collection, data): - """ - Creates a new membership record for data to a collection. - - :param collection: Collection the data is to be added to. - :param data: Data to be added to the collection. - """ - return self.new_record( - PROV_MEMBERSHIP, None, { - PROV_ATTR_COLLECTION: collection, - PROVONE_ATTR_DATA: data - } - ) - - # Aliases - wasAttributedTo = attribution - wasAssociatedWith = association - wasDerivedFrom = derivation - wasGeneratedBy = generation - wasInformedBy = communication - used = usage - hasInPort = has_in_ports - hasOutPort = has_out_ports - hasSubProcess = has_sub_process - CLtoDestP = control_link_to_process - sourcePToCL = process_to_control_link - hasDefaultParam = parameterization - dataOnLink = linkage - isPartOf = is_part_of - hadMember = membership + """ProvONE Document""" + + def __repr__(self): + return "" + + def process(self, identifier, other_attributes=None): + """ + Creates a new process. + + :param identifier: Identifier for new process. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record(PROVONE_PROCESS, identifier, None, other_attributes) + + def user(self, identifier, other_attributes=None): + """ + Creates a new user. + + :param identifier: Identifier for new user. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record(PROVONE_USER, identifier, None, other_attributes) + + def data(self, identifier, other_attributes=None): + """ + Creates a new data. + + :param identifier: Identifier for new data. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record(PROVONE_DATA, identifier, None, other_attributes) + + def attribution(self, process_spec, user, identifier=None, other_attributes=None): + """ + Creates a new attribution record between a process specification and an user. + + :param process_spec: ProcessSpecification or a string identifier for the process spec (relationship + source). + :param user: User or string identifier of the user involved in the + attribution (relationship destination). + :param identifier: Identifier for new attribution record. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record( + PROV_ATTRIBUTION, + identifier, + {PROVONE_ATTR_PROCESS: process_spec, PROVONE_ATTR_USER: user}, + other_attributes, + ) + + def processExec( + self, identifier, startTime=None, endTime=None, other_attributes=None + ): + """ + Creates a new process execution. + + :param identifier: Identifier for new process execution. + :param startTime: Optional start time for the process execution (default: + None). + Either a :py:class:`datetime.datetime` object or a string that can be + parsed by :py:func:`dateutil.parser`. + :param endTime: Optional end time for the process execution (default: None). + Either a :py:class:`datetime.datetime` object or a string that can be + parsed by :py:func:`dateutil.parser`. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record( + PROVONE_PROCESSEXEC, + identifier, + { + PROV_ATTR_STARTTIME: _ensure_datetime(startTime), + PROV_ATTR_ENDTIME: _ensure_datetime(endTime), + }, + other_attributes, + ) + + def association( + self, + process_exec, + process_spec=None, + plan=None, + identifier=None, + other_attributes=None, + ): + """ + Creates a new association record for a process execution. + + :param process_exec: Process Execution or a string identifier for the + process execution. + :param process_spec: Process Spec or string identifier of the process + involved in the association (default: None). + :param plan: Optionally extra entity to state qualified association through + an internal plan (default: None). + :param identifier: Identifier for new association record. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record( + PROV_ASSOCIATION, + identifier, + { + PROVONE_ATTR_PROCESSEXEC: process_exec, + PROVONE_ATTR_PROCESS: process_spec, + PROVONE_ATTR_PLAN: plan, + }, + other_attributes, + ) + + def derivation( + self, + generatedData, + usedData, + process_exec=None, + generation=None, + usage=None, + identifier=None, + other_attributes=None, + ): + """ + Creates a new derivation record for a generated data from a used data. + + :param generatedData: Data or a string identifier for the generated + data (relationship source). + :param usedData: Data or a string identifier for the used data + (relationship destination). + :param process_exec: Process execution or string identifier of the + processExec involved in the derivation (default: None). + :param generation: Optionally extra activity to state qualified generation + through a generation (default: None). + :param usage: XXX (default: None). + :param identifier: Identifier for new derivation record. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + attributes = { + PROVONE_ATTR_GENERATED_DATA: generatedData, + PROVONE_ATTR_USED_DATA: usedData, + PROVONE_ATTR_PROCESSEXEC: process_exec, + PROVONE_ATTR_GENERATION: generation, + PROV_ATTR_USAGE: usage, + } + return self.new_record( + PROV_DERIVATION, identifier, attributes, other_attributes + ) + + def generation( + self, data, process_exec=None, time=None, identifier=None, other_attributes=None + ): + """ + Creates a new generation record for a data. + + :param data: Data or a string identifier for the data. + :param process_exec: Process execution or string identifier of the + process_exec involved in the generation (default: None). + :param time: Optional time for the generation (default: None). + Either a :py:class:`datetime.datetime` object or a string that can be + parsed by :py:func:`dateutil.parser`. + :param identifier: Identifier for new generation record. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record( + PROV_GENERATION, + identifier, + { + PROVONE_ATTR_DATA: data, + PROVONE_ATTR_PROCESSEXEC: process_exec, + PROV_ATTR_TIME: _ensure_datetime(time), + }, + other_attributes, + ) + + def usage( + self, process_exec, data=None, time=None, identifier=None, other_attributes=None + ): + """ + Creates a new usage record for a process execution. + + :param process_exec: Process Execution or a string identifier for the + processExec. + :param data: Data or string identifier of the data involved in + the usage relationship (default: None). + :param time: Optional time for the usage (default: None). + Either a :py:class:`datetime.datetime` object or a string that can be + parsed by :py:func:`dateutil.parser`. + :param identifier: Identifier for new usage record. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record( + PROV_USAGE, + identifier, + { + PROVONE_ATTR_PROCESSEXEC: process_exec, + PROVONE_ATTR_DATA: data, + PROV_ATTR_TIME: _ensure_datetime(time), + }, + other_attributes, + ) + + def communication( + self, informed, informant, identifier=None, other_attributes=None + ): + """ + Creates a new communication record for a process exec. + + :param informed: The informed processExec (relationship destination). + :param informant: The informing processExec (relationship source). + :param identifier: Identifier for new communication record. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record( + PROV_COMMUNICATION, + identifier, + {PROVONE_ATTR_INFORMED: informed, PROVONE_ATTR_INFORMANT: informant}, + other_attributes, + ) + + def input_port(self, identifier, other_attributes=None): + """ + Creates a new input port. + + :param identifier: Identifier for new input port. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record(PROVONE_INPUTPORT, identifier, None, other_attributes) + + def has_in_ports(self, process, in_ports, identifier=None, other_attributes=None): + """ + Creates a new input port record for a process. + + :param process: Process or a string identifier for the + process(relationship source). + :param in_ports: Input Port or string identifier for the used input port ( + relationship destination). + :param identifier: Identifier for new input port membership. + :param other_attributes: Optional other attributes as a dictionary or + list of tuples to be added to the record optionally (default: None). + """ + return self.new_record( + PROVONE_HASINPORT, + identifier, + { + PROVONE_ATTR_PROCESS: process, + PROVONE_ATTR_INPUTPORT: in_ports, + }, + other_attributes, + ) + + def output_port(self, identifier, other_attributes=None): + """ + Creates a new output port. + + :param identifier: Identifier for new output port. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record(PROVONE_OUTPUTPORT, identifier, None, other_attributes) + + def has_out_ports(self, process, out_ports, identifier=None, other_attributes=None): + """ + Creates a new input port record for a process. + + :param process: Process or a string identifier for the + process(relationship source). + :param out_ports: Output Port or string identifier for the used output + port (relationship destination). + :param identifier: Identifier for new output port membership. + :param other_attributes: Optional other attributes as a dictionary or + list of tuples to be added to the record optionally (default: None). + """ + return self.new_record( + PROVONE_HASOUTPORT, + identifier, + { + PROVONE_ATTR_PROCESS: process, + PROVONE_ATTR_OUTPUTPORT: out_ports, + }, + other_attributes, + ) + + def has_sub_process( + self, used_process, generated_process, identifier=None, other_attributes=None + ): + """ + Creates a new has-sub-process record for a generated process from a + used process. + + :param used_process: Process or a string identifier for the + used process (relationship source). + :param generated_process: Process or a string identifier for the + generated process (relationship destination). + :param identifier: Identifier for new sub-process record. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + attributes = { + PROVONE_ATTR_USED_PROCESS: used_process, + PROVONE_ATTR_GENERATED_PROCESS: generated_process, + } + return self.new_record( + PROVONE_HASSUBPROCESS, identifier, attributes, other_attributes + ) + + def dataLink(self, identifier, other_attributes=None): + """ + Creates a new data link. + + :param identifier: Identifier for new data link. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record(PROVONE_DATALINK, identifier, None, other_attributes) + + def inPortToDL(self, in_port, dt_link, identifier=None, other_attributes=None): + """ + + :param in_port: Input port or a string identifier for the + in_port(relationship source). + :param dt_link: Data Link or string identifier for the used data link ( + relationship destination). + :param identifier: Identifier for new data link membership. + :param other_attributes: Optional other attributes as a dictionary or + list of tuples to be added to the record optionally (default: None). + + """ + return self.new_record( + PROVONE_INPORTTODL, + identifier, + { + PROVONE_ATTR_INPUTPORT: in_port, + PROVONE_ATTR_DATALINK: dt_link, + }, + other_attributes, + ) + + def outPortToDL(self, out_port, dt_link, identifier=None, other_attributes=None): + """ + + :param out_port: Output port or a string identifier for the + out_port(relationship source). + :param dt_link: Data Link or string identifier for the used data link ( + relationship destination). + :param identifier: Identifier for new output-data link membership. + :param other_attributes: Optional other attributes as a dictionary or + list of tuples to be added to the record optionally (default: None). + + """ + return self.new_record( + PROVONE_OUTPORTTODL, + identifier, + { + PROVONE_ATTR_OUTPUTPORT: out_port, + PROVONE_ATTR_DATALINK: dt_link, + }, + other_attributes, + ) + + def DLToOutPort(self, dt_link, out_port, identifier=None, other_attributes=None): + """ + + :param dt_link: Data Link or string identifier for the used data link ( + relationship source). + :param out_port: Output port or a string identifier for the + out_port(relationship destination). + :param identifier: Identifier for new data link-output membership. + :param other_attributes: Optional other attributes as a dictionary or + list of tuples to be added to the record optionally (default: None). + + """ + return self.new_record( + PROVONE_DLTOOUTPORT, + identifier, + { + PROVONE_ATTR_DATALINK: dt_link, + PROVONE_ATTR_OUTPUTPORT: out_port, + }, + other_attributes, + ) + + def DLToInPort(self, dt_link, in_port, identifier=None, other_attributes=None): + """ + + :param dt_link: Data Link or string identifier for the used data link ( + relationship source). + :param in_port: Input port or a string identifier for the in_port ( + relationship destination). + :param identifier: Identifier for new data link-output membership. + :param other_attributes: Optional other attributes as a dictionary or + list of tuples to be added to the record optionally (default: None). + + """ + return self.new_record( + PROVONE_DLTOINPORT, + identifier, + { + PROVONE_ATTR_DATALINK: dt_link, + PROVONE_ATTR_INPUTPORT: in_port, + }, + other_attributes, + ) + + def seqCtrlLink(self, identifier, other_attributes=None): + """ + Creates a new seq ctrl link. + + :param identifier: Identifier for new seq ctrl link. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + + """ + return self.new_record(PROVONE_SEQCTRLLINK, identifier, None, other_attributes) + + def control_link_to_process( + self, used_cntrl_link, used_process, identifier=None, other_attributes=None + ): + """ + + :param used_cntrl_link: Control Link or a string identifier for the used + control link (relationship source). + :param used_process: Data Link or string identifier for the used process + (relationship destination). + :param identifier: Identifier for new control link to process relation. + :param other_attributes: Optional other attributes as a dictionary or + list of tuples to be added to the record optionally (default: None). + + """ + return self.new_record( + PROVONE_CLTODESTP, + identifier, + { + PROVONE_ATTR_SEQCTRLLINK: used_cntrl_link, + PROVONE_ATTR_PROCESS: used_process, + }, + other_attributes, + ) + + def process_to_control_link( + self, used_process, used_cntrl_link, identifier=None, other_attributes=None + ): + """ + + :param used_process: Process or string identifier for the used process + (relationship source). + :param used_cntrl_link: Control Link or a string identifier for the used + control link (relationship destination). + :param identifier: Identifier for new process to control link relation. + :param other_attributes: Optional other attributes as a dictionary or + list of tuples to be added to the record optionally (default: None). + + """ + return self.new_record( + PROVONE_SOURCEPTOCL, + identifier, + { + PROVONE_ATTR_PROCESS: used_process, + PROVONE_ATTR_SEQCTRLLINK: used_cntrl_link, + }, + other_attributes, + ) + + def linkage( + self, + data_item, + dl_link, + related_process=None, + identifier=None, + other_attributes=None, + ): + """ + + :param data_item: Data or string identifier for the associated data ( + relationship source). + :param dl_link: Data link or string identifier for the data link ( + relationship destination). + :param related_process: Process or string identifier of the data link (default=None) + :param identifier: Identifier for new data-on-link relation. + :param other_attributes: Optional other attributes as a dictionary or + list of tuples to be added to the record optionally (default: None). + + """ + return self.new_record( + PROVONE_DATAONLINK, + identifier, + { + PROVONE_ATTR_DATA: data_item, + PROVONE_ATTR_DATALINK: dl_link, + PROVONE_ATTR_PROCESS: related_process, + }, + other_attributes, + ) + + def parameterization( + self, in_port, data_item, identifier=None, other_attributes=None + ): + """ + + :param in_port: InputPort or string identifier for the associated in port ( + relationship source). + :param data_item: Data item or string identifier for the associated + default parameter (relationship destination). + :param identifier: Identifier for new default parameter relation. + :param other_attributes: Optional other attributes as a dictionary or + list of tuples to be added to the record optionally (default: None). + """ + return self.new_record( + PROVONE_HASDEFAULTPARAM, + identifier, + { + PROVONE_ATTR_INPUTPORT: in_port, + PROVONE_ATTR_DATA: data_item, + }, + other_attributes, + ) + + def serialize(self, destination=None, format="json", **args): + """ + Serialize the :py:class:`ProvDocument` to the destination. + + Available serializers can be queried by the value of + `:py:attr:~prov.serializers.Registry.serializers` after loading them via + `:py:func:~prov.serializers.Registry.load_serializers()`. + + :param destination: Stream object to serialize the output to. Default is + `None`, which serializes as a string. + :param format: Serialization format (default: 'json'), defaulting to + PROV-JSON. + :return: Serialization in a string if no destination was given, + None otherwise. + """ + serializer = nidm.core.serializers.get(format)(self) + if destination is None: + stream = io.StringIO() + serializer.serialize(stream, **args) + return stream.getvalue() + if hasattr(destination, "write"): + stream = destination + serializer.serialize(stream, **args) + else: + location = destination + scheme, netloc, path, params, _query, fragment = urlparse(location) + if netloc != "": + print( + "WARNING: not saving as location " + "is not a local file reference" + ) + return + fd, name = tempfile.mkstemp() + stream = os.fdopen(fd, "wb") + serializer.serialize(stream, **args) + stream.close() + if hasattr(shutil, "move"): + shutil.move(name, path) + else: + shutil.copy(name, path) + os.remove(name) + + def is_part_of( + self, used_processex, child_processex, identifier=None, other_attributes=None + ): + """ + Creates a new is-part-of record for a process exec. + + :param used_processex: The parent processExec (relationship source). + :param child_processex: The child processExec (relationship destination). + :param identifier: Identifier for new is-part-of record. + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + return self.new_record( + PROVONE_ISPARTOF, + identifier, + { + PROVONE_ATTR_USED_PREXEC: used_processex, + PROVONE_ATTR_CHILD_PREXEC: child_processex, + }, + other_attributes, + ) + + def membership(self, collection, data): + """ + Creates a new membership record for data to a collection. + + :param collection: Collection the data is to be added to. + :param data: Data to be added to the collection. + """ + return self.new_record( + PROV_MEMBERSHIP, + None, + {PROV_ATTR_COLLECTION: collection, PROVONE_ATTR_DATA: data}, + ) + + # Aliases + wasAttributedTo = attribution + wasAssociatedWith = association + wasDerivedFrom = derivation + wasGeneratedBy = generation + wasInformedBy = communication + used = usage + hasInPort = has_in_ports + hasOutPort = has_out_ports + hasSubProcess = has_sub_process + CLtoDestP = control_link_to_process + sourcePToCL = process_to_control_link + hasDefaultParam = parameterization + dataOnLink = linkage + isPartOf = is_part_of + hadMember = membership diff --git a/nidm/core/serializers/__init__.py b/nidm/core/serializers/__init__.py index c96b2075..e8b03fdf 100644 --- a/nidm/core/serializers/__init__.py +++ b/nidm/core/serializers/__init__.py @@ -1,14 +1,10 @@ -from __future__ import (absolute_import, division, print_function, - unicode_literals) - +from __future__ import absolute_import, division, print_function, unicode_literals from prov import Error -__author__ = 'Trung Dong Huynh' -__email__ = 'trungdong@donggiang.com' +__author__ = "Trung Dong Huynh" +__email__ = "trungdong@donggiang.com" -__all__ = [ - 'get' -] +__all__ = ["get"] class Serializer(object): @@ -42,6 +38,7 @@ def deserialize(self, stream, **kwargs): class DoNotExist(Error): """Exception for the case a serializer is not available.""" + pass @@ -54,16 +51,16 @@ class Registry: @staticmethod def load_serializers(): """Loads all available serializers into the registry.""" + from nidm.core.serializers.provonerdf import ProvONERDFSerializer from prov.serializers.provjson import ProvJSONSerializer from prov.serializers.provn import ProvNSerializer from prov.serializers.provxml import ProvXMLSerializer - from nidm.core.serializers.provonerdf import ProvONERDFSerializer Registry.serializers = { - 'json': ProvJSONSerializer, - 'rdf': ProvONERDFSerializer, - 'provn': ProvNSerializer, - 'xml': ProvXMLSerializer + "json": ProvJSONSerializer, + "rdf": ProvONERDFSerializer, + "provn": ProvNSerializer, + "xml": ProvXMLSerializer, } @@ -77,8 +74,4 @@ def get(format_name): try: return Registry.serializers[format_name] except KeyError: - raise DoNotExist( - 'No serializer available for the format "%s"' % format_name - ) - - + raise DoNotExist('No serializer available for the format "%s"' % format_name) diff --git a/nidm/core/serializers/provonerdf.py b/nidm/core/serializers/provonerdf.py index 075cc006..c752005f 100644 --- a/nidm/core/serializers/provonerdf.py +++ b/nidm/core/serializers/provonerdf.py @@ -1,38 +1,56 @@ """PROVONE-RDF serializers for ProvONEDocument """ -from __future__ import (absolute_import, division, print_function, - unicode_literals) - +from __future__ import absolute_import, division, print_function, unicode_literals import base64 from collections import OrderedDict import datetime import io - import dateutil.parser -import six - -from rdflib.term import URIRef, BNode -from rdflib.term import Literal as RDFLiteral -from rdflib.graph import ConjunctiveGraph -from rdflib.namespace import RDF, RDFS, XSD - -import prov.model as pm +from nidm.core.Constants import PROVONE, PROVONE_ID_ATTRIBUTES_MAP +from nidm.core.provone import PROVONE_N_MAP +from nidm.core.serializers import Serializer from prov.constants import ( - PROV, PROV_ID_ATTRIBUTES_MAP, PROV_N_MAP, PROV_BASE_CLS, XSD_QNAME, - PROV_END, PROV_START, PROV_USAGE, PROV_GENERATION, PROV_DERIVATION, PROV_INVALIDATION, - PROV_ALTERNATE, PROV_MENTION, PROV_DELEGATION, PROV_ACTIVITY, PROV_ATTR_STARTTIME, - PROV_ATTR_ENDTIME, PROV_LOCATION, PROV_ATTR_TIME, PROV_ROLE, PROV_COMMUNICATION, - PROV_ATTR_INFORMANT, PROV_ATTR_RESPONSIBLE, PROV_ATTR_TRIGGER, PROV_ATTR_ENDER, - PROV_ATTR_STARTER, PROV_ATTR_USED_ENTITY, PROV_ASSOCIATION) + PROV, + PROV_ACTIVITY, + PROV_ALTERNATE, + PROV_ASSOCIATION, + PROV_ATTR_ENDER, + PROV_ATTR_ENDTIME, + PROV_ATTR_INFORMANT, + PROV_ATTR_RESPONSIBLE, + PROV_ATTR_STARTER, + PROV_ATTR_STARTTIME, + PROV_ATTR_TIME, + PROV_ATTR_TRIGGER, + PROV_ATTR_USED_ENTITY, + PROV_BASE_CLS, + PROV_COMMUNICATION, + PROV_DELEGATION, + PROV_DERIVATION, + PROV_END, + PROV_GENERATION, + PROV_ID_ATTRIBUTES_MAP, + PROV_INVALIDATION, + PROV_LOCATION, + PROV_MENTION, + PROV_N_MAP, + PROV_ROLE, + PROV_START, + PROV_USAGE, + XSD_QNAME, +) +import prov.model as pm from prov.serializers import Error import prov.serializers.provrdf +from rdflib.graph import ConjunctiveGraph +from rdflib.namespace import RDF, RDFS, XSD +from rdflib.term import BNode +from rdflib.term import Literal as RDFLiteral +from rdflib.term import URIRef +import six -from nidm.core.Constants import PROVONE_ID_ATTRIBUTES_MAP, PROVONE -from nidm.core.serializers import Serializer -from nidm.core.provone import PROVONE_N_MAP - -__author__ = 'Sanu Ann Abraham' -__email__ = 'sanuann@mit.edu' +__author__ = "Sanu Ann Abraham" +__email__ = "sanuann@mit.edu" class ProvRDFException(Error): @@ -47,28 +65,26 @@ def __init__(self): def get_anon_id(self, obj, local_prefix="id"): if obj not in self._cache: self._count += 1 - self._cache[obj] = pm.Identifier( - '_:%s%d' % (local_prefix, self._count) - ).uri + self._cache[obj] = pm.Identifier("_:%s%d" % (local_prefix, self._count)).uri return self._cache[obj] # Reverse map for prov.model.XSD_DATATYPE_PARSERS LITERAL_XSDTYPE_MAP = { - float: XSD['double'], - int: XSD['int'], - six.text_type: XSD['string'], + float: XSD["double"], + int: XSD["int"], + six.text_type: XSD["string"], # boolean, string values are supported natively by PROV-RDF # datetime values are converted separately } # Add long on Python 2 if six.integer_types[-1] not in LITERAL_XSDTYPE_MAP: - LITERAL_XSDTYPE_MAP[six.integer_types[-1]] = XSD['long'] + LITERAL_XSDTYPE_MAP[six.integer_types[-1]] = XSD["long"] def attr2rdf(attr): - return URIRef(PROVONE[PROVONE_ID_ATTRIBUTES_MAP[attr].split('provone:')[1]].uri) + return URIRef(PROVONE[PROVONE_ID_ATTRIBUTES_MAP[attr].split("provone:")[1]].uri) prov.serializers.provrdf.attr2rdf = attr2rdf @@ -86,7 +102,7 @@ class ProvONERDFSerializer(Serializer): PROV-O serializer for :class:`~prov.model.ProvDocument` """ - def serialize(self, stream=None, rdf_format='trig', **kwargs): + def serialize(self, stream=None, rdf_format="trig", **kwargs): """ Serializes a :class:`~prov.model.ProvDocument` instance to `PROV-O `_. @@ -96,7 +112,7 @@ def serialize(self, stream=None, rdf_format='trig', **kwargs): """ container = self.encode_document(self.document) newargs = kwargs.copy() - newargs['format'] = rdf_format + newargs["format"] = rdf_format if six.PY2: buf = io.BytesIO() @@ -107,7 +123,7 @@ def serialize(self, stream=None, rdf_format='trig', **kwargs): # a text object is must be decoded. We assume utf-8 here which # should be fine for almost every case. if isinstance(stream, io.TextIOBase): - stream.write(buf.read().decode('utf-8')) + stream.write(buf.read().decode("utf-8")) else: stream.write(buf.read()) finally: @@ -121,13 +137,13 @@ def serialize(self, stream=None, rdf_format='trig', **kwargs): # a text object is must be decoded. We assume utf-8 here which # should be fine for almost every case. if isinstance(stream, io.TextIOBase): - stream.write(buf.read().decode('utf-8')) + stream.write(buf.read().decode("utf-8")) else: stream.write(buf.read()) finally: buf.close() - def deserialize(self, stream, rdf_format='trig', **kwargs): + def deserialize(self, stream, rdf_format="trig", **kwargs): """ Deserialize from the `PROV-O `_ representation to a :class:`~prov.model.ProvDocument` instance. @@ -136,7 +152,7 @@ def deserialize(self, stream, rdf_format='trig', **kwargs): :param rdf_format: The RDF format of the input data, default: TRiG. """ newargs = kwargs.copy() - newargs['format'] = rdf_format + newargs["format"] = rdf_format container = ConjunctiveGraph() container.parse(stream, **newargs) document = pm.ProvDocument() @@ -153,11 +169,11 @@ def encode_rdf_representation(self, value): elif isinstance(value, pm.Literal): return literal_rdf_representation(value) elif isinstance(value, datetime.datetime): - return RDFLiteral(value.isoformat(), datatype=XSD['dateTime']) + return RDFLiteral(value.isoformat(), datatype=XSD["dateTime"]) elif isinstance(value, pm.QualifiedName): return URIRef(value.uri) elif isinstance(value, pm.Identifier): - return RDFLiteral(value.uri, datatype=XSD['anyURI']) + return RDFLiteral(value.uri, datatype=XSD["anyURI"]) elif type(value) in LITERAL_XSDTYPE_MAP: return RDFLiteral(value, datatype=LITERAL_XSDTYPE_MAP[type(value)]) else: @@ -166,23 +182,27 @@ def encode_rdf_representation(self, value): def decode_rdf_representation(self, literal, graph): if isinstance(literal, RDFLiteral): value = literal.value if literal.value is not None else literal - datatype = literal.datatype if hasattr(literal, 'datatype') else None - langtag = literal.language if hasattr(literal, 'language') else None - if datatype and 'XMLLiteral' in datatype: + datatype = literal.datatype if hasattr(literal, "datatype") else None + langtag = literal.language if hasattr(literal, "language") else None + if datatype and "XMLLiteral" in datatype: value = literal - if datatype and 'base64Binary' in datatype: + if datatype and "base64Binary" in datatype: value = base64.standard_b64encode(value) - if datatype == XSD['QName']: + if datatype == XSD["QName"]: return pm.Literal(literal, datatype=XSD_QNAME) - if datatype == XSD['dateTime']: + if datatype == XSD["dateTime"]: return dateutil.parser.parse(literal) - if datatype == XSD['gYear']: - return pm.Literal(dateutil.parser.parse(literal).year, - datatype=self.valid_identifier(datatype)) - if datatype == XSD['gYearMonth']: + if datatype == XSD["gYear"]: + return pm.Literal( + dateutil.parser.parse(literal).year, + datatype=self.valid_identifier(datatype), + ) + if datatype == XSD["gYearMonth"]: parsed_info = dateutil.parser.parse(literal) - return pm.Literal('{0}-{1:02d}'.format(parsed_info.year, parsed_info.month), - datatype=self.valid_identifier(datatype)) + return pm.Literal( + "{0}-{1:02d}".format(parsed_info.year, parsed_info.month), + datatype=self.valid_identifier(datatype), + ) else: # The literal of standard Python types is not converted here # It will be automatically converted when added to a record by @@ -193,7 +213,7 @@ def decode_rdf_representation(self, literal, graph): if rval is None: prefix, iri, _ = graph.namespace_manager.compute_qname(literal) ns = self.document.add_namespace(prefix, iri) - rval = pm.QualifiedName(ns, literal.replace(ns.uri, '')) + rval = pm.QualifiedName(ns, literal.replace(ns.uri, "")) return rval else: # simple type, just return it @@ -211,18 +231,21 @@ def encode_container(self, bundle, container=None, identifier=None): if container is None: container = ConjunctiveGraph(identifier=identifier) nm = container.namespace_manager - nm.bind('prov', PROV.uri) + nm.bind("prov", PROV.uri) for namespace in bundle.namespaces: container.bind(namespace.prefix, namespace.uri) id_generator = AnonymousIDGenerator() - real_or_anon_id = lambda record: record._identifier.uri if \ - record._identifier else id_generator.get_anon_id(record) + real_or_anon_id = ( + lambda record: record._identifier.uri + if record._identifier + else id_generator.get_anon_id(record) + ) for record in bundle._records: rec_type = record.get_type() - if hasattr(record, 'identifier') and record.identifier: + if hasattr(record, "identifier") and record.identifier: identifier = URIRef(six.text_type(real_or_anon_id(record))) container.add((identifier, RDF.type, URIRef(rec_type.uri))) else: @@ -231,16 +254,19 @@ def encode_container(self, bundle, container=None, identifier=None): bnode = None formal_objects = [] used_objects = [] - all_attributes = list(record.formal_attributes) + list(record.attributes) + all_attributes = list(record.formal_attributes) + list( + record.attributes + ) formal_qualifiers = False for attrid, (attr, value) in enumerate(list(record.formal_attributes)): - if (identifier is not None and value is not None) or \ - (identifier is None and value is not None and attrid > 1): + if (identifier is not None and value is not None) or ( + identifier is None and value is not None and attrid > 1 + ): formal_qualifiers = True has_qualifiers = len(record.extra_attributes) > 0 or formal_qualifiers for idx, (attr, value) in enumerate(all_attributes): if record.is_relation(): - if rec_type.namespace.prefix == 'prov': + if rec_type.namespace.prefix == "prov": pred = URIRef(PROV[PROV_N_MAP[rec_type]].uri) else: pred = URIRef(PROVONE[PROVONE_N_MAP[rec_type]].uri) @@ -258,20 +284,28 @@ def encode_container(self, bundle, container=None, identifier=None): if identifier is None and subj is not None: try: obj_val = record.formal_attributes[1][1] - obj_attr = URIRef(record.formal_attributes[1][0].uri) + obj_attr = URIRef( + record.formal_attributes[1][0].uri + ) # TODO: Why is obj_attr above not used anywhere? except IndexError: obj_val = None - if obj_val and (rec_type not in - {PROV_END, - PROV_START, - PROV_USAGE, - PROV_GENERATION, - PROV_DERIVATION, - PROV_ASSOCIATION, - PROV_INVALIDATION} or - (valid_formal_indices == {0, 1} and - len(record.extra_attributes) == 0)): + if obj_val and ( + rec_type + not in { + PROV_END, + PROV_START, + PROV_USAGE, + PROV_GENERATION, + PROV_DERIVATION, + PROV_ASSOCIATION, + PROV_INVALIDATION, + } + or ( + valid_formal_indices == {0, 1} + and len(record.extra_attributes) == 0 + ) + ): used_objects.append(record.formal_attributes[1][0]) obj_val = self.encode_rdf_representation(obj_val) if rec_type == PROV_ALTERNATE: @@ -279,9 +313,19 @@ def encode_container(self, bundle, container=None, identifier=None): container.add((subj, pred, obj_val)) if rec_type == PROV_MENTION: if record.formal_attributes[2][1]: - used_objects.append(record.formal_attributes[2][0]) - obj_val = self.encode_rdf_representation(record.formal_attributes[2][1]) - container.add((subj, URIRef(PROV['asInBundle'].uri), obj_val)) + used_objects.append( + record.formal_attributes[2][0] + ) + obj_val = self.encode_rdf_representation( + record.formal_attributes[2][1] + ) + container.add( + ( + subj, + URIRef(PROV["asInBundle"].uri), + obj_val, + ) + ) has_qualifiers = False if rec_type in [PROV_ALTERNATE]: continue @@ -289,17 +333,23 @@ def encode_container(self, bundle, container=None, identifier=None): qualifier = rec_type._localpart rec_uri = rec_type.uri for attr_name, val in record.extra_attributes: - if attr_name == PROV['type']: - if PROV['Revision'] == val or \ - PROV['Quotation'] == val or \ - PROV['PrimarySource'] == val: + if attr_name == PROV["type"]: + if ( + PROV["Revision"] == val + or PROV["Quotation"] == val + or PROV["PrimarySource"] == val + ): qualifier = val._localpart rec_uri = val.uri if identifier is not None: - container.remove((identifier, - RDF.type, - URIRef(rec_type.uri))) - QRole = URIRef(PROV['qualified' + qualifier].uri) + container.remove( + ( + identifier, + RDF.type, + URIRef(rec_type.uri), + ) + ) + QRole = URIRef(PROV["qualified" + qualifier].uri) if identifier is not None: container.add((subj, QRole, identifier)) else: @@ -311,59 +361,72 @@ def encode_container(self, bundle, container=None, identifier=None): if value is not None and attr not in used_objects: if attr in formal_objects: pred = attr2rdf(attr) - elif attr == PROV['role']: - pred = URIRef(PROV['hadRole'].uri) - elif attr == PROV['plan']: - pred = URIRef(PROV['hadPlan'].uri) - elif attr == PROV['type']: + elif attr == PROV["role"]: + pred = URIRef(PROV["hadRole"].uri) + elif attr == PROV["plan"]: + pred = URIRef(PROV["hadPlan"].uri) + elif attr == PROV["type"]: pred = RDF.type - elif attr == PROV['label']: + elif attr == PROV["label"]: pred = RDFS.label elif isinstance(attr, pm.QualifiedName): pred = URIRef(attr.uri) else: pred = self.encode_rdf_representation(attr) - if PROV['plan'].uri in pred: - pred = URIRef(PROV['hadPlan'].uri) - if PROV['informant'].uri in pred: - pred = URIRef(PROV['activity'].uri) - if PROV['responsible'].uri in pred: - pred = URIRef(PROV['agent'].uri) - if rec_type == PROV_DELEGATION and \ - PROV['activity'].uri in pred: - pred = URIRef(PROV['hadActivity'].uri) - if (rec_type in [PROV_END, PROV_START] and - PROV['trigger'].uri in pred) or\ - (rec_type in [PROV_USAGE] and - PROV['used'].uri in pred): - pred = URIRef(PROV['entity'].uri) - if rec_type in [PROV_GENERATION, PROV_END, - PROV_START, PROV_USAGE, - PROV_INVALIDATION]: - if PROV['time'].uri in pred: - pred = URIRef(PROV['atTime'].uri) - if PROV['ender'].uri in pred: - pred = URIRef(PROV['hadActivity'].uri) - if PROV['starter'].uri in pred: - pred = URIRef(PROV['hadActivity'].uri) - if PROV['location'].uri in pred: - pred = URIRef(PROV['atLocation'].uri) + if PROV["plan"].uri in pred: + pred = URIRef(PROV["hadPlan"].uri) + if PROV["informant"].uri in pred: + pred = URIRef(PROV["activity"].uri) + if PROV["responsible"].uri in pred: + pred = URIRef(PROV["agent"].uri) + if ( + rec_type == PROV_DELEGATION + and PROV["activity"].uri in pred + ): + pred = URIRef(PROV["hadActivity"].uri) + if ( + rec_type in [PROV_END, PROV_START] + and PROV["trigger"].uri in pred + ) or ( + rec_type in [PROV_USAGE] and PROV["used"].uri in pred + ): + pred = URIRef(PROV["entity"].uri) + if rec_type in [ + PROV_GENERATION, + PROV_END, + PROV_START, + PROV_USAGE, + PROV_INVALIDATION, + ]: + if PROV["time"].uri in pred: + pred = URIRef(PROV["atTime"].uri) + if PROV["ender"].uri in pred: + pred = URIRef(PROV["hadActivity"].uri) + if PROV["starter"].uri in pred: + pred = URIRef(PROV["hadActivity"].uri) + if PROV["location"].uri in pred: + pred = URIRef(PROV["atLocation"].uri) if rec_type in [PROV_ACTIVITY]: if PROV_ATTR_STARTTIME in pred: - pred = URIRef(PROV['startedAtTime'].uri) + pred = URIRef(PROV["startedAtTime"].uri) if PROV_ATTR_ENDTIME in pred: - pred = URIRef(PROV['endedAtTime'].uri) + pred = URIRef(PROV["endedAtTime"].uri) if rec_type == PROV_DERIVATION: - if PROV['activity'].uri in pred: - pred = URIRef(PROV['hadActivity'].uri) - if PROV['generation'].uri in pred: - pred = URIRef(PROV['hadGeneration'].uri) - if PROV['usage'].uri in pred: - pred = URIRef(PROV['hadUsage'].uri) - if PROV['usedEntity'].uri in pred: - pred = URIRef(PROV['entity'].uri) - container.add((identifier, pred, - self.encode_rdf_representation(value))) + if PROV["activity"].uri in pred: + pred = URIRef(PROV["hadActivity"].uri) + if PROV["generation"].uri in pred: + pred = URIRef(PROV["hadGeneration"].uri) + if PROV["usage"].uri in pred: + pred = URIRef(PROV["hadUsage"].uri) + if PROV["usedEntity"].uri in pred: + pred = URIRef(PROV["entity"].uri) + container.add( + ( + identifier, + pred, + self.encode_rdf_representation(value), + ) + ) continue if value is None: continue @@ -372,24 +435,25 @@ def encode_container(self, bundle, container=None, identifier=None): else: # Assuming this is a datetime value obj = self.encode_rdf_representation(value) - if attr == PROV['location']: - pred = URIRef(PROV['atLocation'].uri) + if attr == PROV["location"]: + pred = URIRef(PROV["atLocation"].uri) if False and isinstance(value, (URIRef, pm.QualifiedName)): if isinstance(value, pm.QualifiedName): value = URIRef(value.uri) container.add((identifier, pred, value)) else: - container.add((identifier, pred, - self.encode_rdf_representation(obj))) + container.add( + (identifier, pred, self.encode_rdf_representation(obj)) + ) continue - if attr == PROV['type']: + if attr == PROV["type"]: pred = RDF.type - elif attr == PROV['label']: + elif attr == PROV["label"]: pred = RDFS.label elif attr == PROV_ATTR_STARTTIME: - pred = URIRef(PROV['startedAtTime'].uri) + pred = URIRef(PROV["startedAtTime"].uri) elif attr == PROV_ATTR_ENDTIME: - pred = URIRef(PROV['endedAtTime'].uri) + pred = URIRef(PROV["endedAtTime"].uri) else: pred = self.encode_rdf_representation(attr) container.add((identifier, pred, obj)) @@ -398,7 +462,7 @@ def encode_container(self, bundle, container=None, identifier=None): def decode_document(self, content, document): for prefix, url in content.namespaces(): document.add_namespace(prefix, six.text_type(url)) - if hasattr(content, 'contexts'): + if hasattr(content, "contexts"): for graph in content.contexts(): if isinstance(graph.identifier, BNode): self.decode_container(graph, document) @@ -416,33 +480,35 @@ def decode_container(self, graph, bundle): unique_sets = {} for key, val in PROV_BASE_CLS.items(): PROV_CLS_MAP[key.uri] = PROV_BASE_CLS[key] - relation_mapper = {URIRef(PROV['alternateOf'].uri): 'alternate', - URIRef(PROV['actedOnBehalfOf'].uri): 'delegation', - URIRef(PROV['specializationOf'].uri): 'specialization', - URIRef(PROV['mentionOf'].uri): 'mention', - URIRef(PROV['wasAssociatedWith'].uri): 'association', - URIRef(PROV['wasDerivedFrom'].uri): 'derivation', - URIRef(PROV['wasAttributedTo'].uri): 'attribution', - URIRef(PROV['wasInformedBy'].uri): 'communication', - URIRef(PROV['wasGeneratedBy'].uri): 'generation', - URIRef(PROV['wasInfluencedBy'].uri): 'influence', - URIRef(PROV['wasInvalidatedBy'].uri): 'invalidation', - URIRef(PROV['wasEndedBy'].uri): 'end', - URIRef(PROV['wasStartedBy'].uri): 'start', - URIRef(PROV['hadMember'].uri): 'membership', - URIRef(PROV['used'].uri): 'usage', - } - predicate_mapper = {RDFS.label: pm.PROV['label'], - URIRef(PROV['atLocation'].uri): PROV_LOCATION, - URIRef(PROV['startedAtTime'].uri): PROV_ATTR_STARTTIME, - URIRef(PROV['endedAtTime'].uri): PROV_ATTR_ENDTIME, - URIRef(PROV['atTime'].uri): PROV_ATTR_TIME, - URIRef(PROV['hadRole'].uri): PROV_ROLE, - URIRef(PROV['hadPlan'].uri): pm.PROV_ATTR_PLAN, - URIRef(PROV['hadUsage'].uri): pm.PROV_ATTR_USAGE, - URIRef(PROV['hadGeneration'].uri): pm.PROV_ATTR_GENERATION, - URIRef(PROV['hadActivity'].uri): pm.PROV_ATTR_ACTIVITY, - } + relation_mapper = { + URIRef(PROV["alternateOf"].uri): "alternate", + URIRef(PROV["actedOnBehalfOf"].uri): "delegation", + URIRef(PROV["specializationOf"].uri): "specialization", + URIRef(PROV["mentionOf"].uri): "mention", + URIRef(PROV["wasAssociatedWith"].uri): "association", + URIRef(PROV["wasDerivedFrom"].uri): "derivation", + URIRef(PROV["wasAttributedTo"].uri): "attribution", + URIRef(PROV["wasInformedBy"].uri): "communication", + URIRef(PROV["wasGeneratedBy"].uri): "generation", + URIRef(PROV["wasInfluencedBy"].uri): "influence", + URIRef(PROV["wasInvalidatedBy"].uri): "invalidation", + URIRef(PROV["wasEndedBy"].uri): "end", + URIRef(PROV["wasStartedBy"].uri): "start", + URIRef(PROV["hadMember"].uri): "membership", + URIRef(PROV["used"].uri): "usage", + } + predicate_mapper = { + RDFS.label: pm.PROV["label"], + URIRef(PROV["atLocation"].uri): PROV_LOCATION, + URIRef(PROV["startedAtTime"].uri): PROV_ATTR_STARTTIME, + URIRef(PROV["endedAtTime"].uri): PROV_ATTR_ENDTIME, + URIRef(PROV["atTime"].uri): PROV_ATTR_TIME, + URIRef(PROV["hadRole"].uri): PROV_ROLE, + URIRef(PROV["hadPlan"].uri): pm.PROV_ATTR_PLAN, + URIRef(PROV["hadUsage"].uri): pm.PROV_ATTR_USAGE, + URIRef(PROV["hadGeneration"].uri): pm.PROV_ATTR_GENERATION, + URIRef(PROV["hadActivity"].uri): pm.PROV_ATTR_ACTIVITY, + } other_attributes = {} for stmt in graph.triples((None, RDF.type, None)): id = six.text_type(stmt[0]) @@ -456,27 +522,42 @@ def decode_container(self, graph, bundle): except AttributeError: prov_obj = None add_attr = True - isderivation = pm.PROV['Revision'].uri in stmt[2] or \ - pm.PROV['Quotation'].uri in stmt[2] or \ - pm.PROV['PrimarySource'].uri in stmt[2] - if id not in ids and prov_obj and (prov_obj.uri == obj or - isderivation or - isinstance(stmt[0], BNode)): + isderivation = ( + pm.PROV["Revision"].uri in stmt[2] + or pm.PROV["Quotation"].uri in stmt[2] + or pm.PROV["PrimarySource"].uri in stmt[2] + ) + if ( + id not in ids + and prov_obj + and ( + prov_obj.uri == obj + or isderivation + or isinstance(stmt[0], BNode) + ) + ): ids[id] = prov_obj klass = pm.PROV_REC_CLS[prov_obj] - formal_attributes[id] = OrderedDict([(key, None) for key in klass.FORMAL_ATTRIBUTES]) - unique_sets[id] = OrderedDict([(key, []) for key in klass.FORMAL_ATTRIBUTES]) - add_attr = False or ((isinstance(stmt[0], BNode) or isderivation) and prov_obj.uri != obj) + formal_attributes[id] = OrderedDict( + [(key, None) for key in klass.FORMAL_ATTRIBUTES] + ) + unique_sets[id] = OrderedDict( + [(key, []) for key in klass.FORMAL_ATTRIBUTES] + ) + add_attr = False or ( + (isinstance(stmt[0], BNode) or isderivation) + and prov_obj.uri != obj + ) if add_attr: if id not in other_attributes: other_attributes[id] = [] obj_formatted = self.decode_rdf_representation(stmt[2], graph) - other_attributes[id].append((pm.PROV['type'], obj_formatted)) + other_attributes[id].append((pm.PROV["type"], obj_formatted)) else: if id not in other_attributes: other_attributes[id] = [] obj = self.decode_rdf_representation(stmt[2], graph) - other_attributes[id].append((pm.PROV['type'], obj)) + other_attributes[id].append((pm.PROV["type"], obj)) for id, pred, obj in graph: id = six.text_type(id) if id not in other_attributes: @@ -484,59 +565,84 @@ def decode_container(self, graph, bundle): if pred == RDF.type: continue if pred in relation_mapper: - if 'alternateOf' in pred: + if "alternateOf" in pred: getattr(bundle, relation_mapper[pred])(obj, id) - elif 'mentionOf' in pred: + elif "mentionOf" in pred: mentionBundle = None - for stmt in graph.triples((URIRef(id), URIRef(pm.PROV['asInBundle'].uri), None)): + for stmt in graph.triples( + (URIRef(id), URIRef(pm.PROV["asInBundle"].uri), None) + ): mentionBundle = stmt[2] - getattr(bundle, relation_mapper[pred])(id, six.text_type(obj), mentionBundle) - elif 'actedOnBehalfOf' in pred or 'wasAssociatedWith' in pred: - qualifier = 'qualified' + relation_mapper[pred].upper()[0] + relation_mapper[pred][1:] + getattr(bundle, relation_mapper[pred])( + id, six.text_type(obj), mentionBundle + ) + elif "actedOnBehalfOf" in pred or "wasAssociatedWith" in pred: + qualifier = ( + "qualified" + + relation_mapper[pred].upper()[0] + + relation_mapper[pred][1:] + ) qualifier_bnode = None - for stmt in graph.triples((URIRef(id), URIRef(pm.PROV[qualifier].uri), None)): + for stmt in graph.triples( + (URIRef(id), URIRef(pm.PROV[qualifier].uri), None) + ): qualifier_bnode = stmt[2] if qualifier_bnode is None: getattr(bundle, relation_mapper[pred])(id, six.text_type(obj)) else: - fakeys = list(formal_attributes[six.text_type(qualifier_bnode)].keys()) - formal_attributes[six.text_type(qualifier_bnode)][fakeys[0]] = id - formal_attributes[six.text_type(qualifier_bnode)][fakeys[1]] = six.text_type(obj) + fakeys = list( + formal_attributes[six.text_type(qualifier_bnode)].keys() + ) + formal_attributes[six.text_type(qualifier_bnode)][ + fakeys[0] + ] = id + formal_attributes[six.text_type(qualifier_bnode)][ + fakeys[1] + ] = six.text_type(obj) else: getattr(bundle, relation_mapper[pred])(id, six.text_type(obj)) elif id in ids: obj1 = self.decode_rdf_representation(obj, graph) if obj is not None and obj1 is None: - raise ValueError(('Error transforming', obj)) + raise ValueError(("Error transforming", obj)) pred_new = pred if pred in predicate_mapper: pred_new = predicate_mapper[pred] - if ids[id] == PROV_COMMUNICATION and 'activity' in six.text_type(pred_new): + if ids[id] == PROV_COMMUNICATION and "activity" in six.text_type( + pred_new + ): pred_new = PROV_ATTR_INFORMANT - if ids[id] == PROV_DELEGATION and 'agent' in six.text_type(pred_new): + if ids[id] == PROV_DELEGATION and "agent" in six.text_type(pred_new): pred_new = PROV_ATTR_RESPONSIBLE - if ids[id] in [PROV_END, PROV_START] and 'entity' in six.text_type(pred_new): + if ids[id] in [PROV_END, PROV_START] and "entity" in six.text_type( + pred_new + ): pred_new = PROV_ATTR_TRIGGER - if ids[id] in [PROV_END] and 'activity' in six.text_type(pred_new): + if ids[id] in [PROV_END] and "activity" in six.text_type(pred_new): pred_new = PROV_ATTR_ENDER - if ids[id] in [PROV_START] and 'activity' in six.text_type(pred_new): + if ids[id] in [PROV_START] and "activity" in six.text_type(pred_new): pred_new = PROV_ATTR_STARTER - if ids[id] == PROV_DERIVATION and 'entity' in six.text_type(pred_new): + if ids[id] == PROV_DERIVATION and "entity" in six.text_type(pred_new): pred_new = PROV_ATTR_USED_ENTITY - if six.text_type(pred_new) in [val.uri for val in formal_attributes[id]]: + if six.text_type(pred_new) in [ + val.uri for val in formal_attributes[id] + ]: qname_key = self.valid_identifier(pred_new) formal_attributes[id][qname_key] = obj1 unique_sets[id][qname_key].append(obj1) if len(unique_sets[id][qname_key]) > 1: formal_attributes[id][qname_key] = None else: - if 'qualified' not in six.text_type(pred_new) and \ - 'asInBundle' not in six.text_type(pred_new): + if "qualified" not in six.text_type( + pred_new + ) and "asInBundle" not in six.text_type(pred_new): other_attributes[id].append((six.text_type(pred_new), obj1)) local_key = six.text_type(obj) if local_key in ids: - if 'qualified' in pred: - formal_attributes[local_key][list(formal_attributes[local_key].keys())[0]] = id + if "qualified" in pred: + formal_attributes[local_key][ + list(formal_attributes[local_key].keys())[0] + ] = id for id in ids: attrs = None if id in other_attributes: @@ -600,7 +706,7 @@ def literal_rdf_representation(literal): return RDFLiteral(value, lang=str(literal.langtag)) else: datatype = literal.datatype - if 'base64Binary' in datatype.uri: + if "base64Binary" in datatype.uri: if six.PY2: value = base64.standard_b64encode(value) else: diff --git a/nidm/core/tests/test_provone.py b/nidm/core/tests/test_provone.py index 483b3b9e..8bdbcfd7 100644 --- a/nidm/core/tests/test_provone.py +++ b/nidm/core/tests/test_provone.py @@ -1,128 +1,145 @@ -#from nidm.core.provone import ProvONEDocument +# from nidm.core.provone import ProvONEDocument from nidm.core import Constants from nidm.core.dot import provone_to_dot import pytest -pytestmark = pytest.mark.skip(reason="had to comment provone import - was breaking tests from experiment") +pytestmark = pytest.mark.skip( + reason="had to comment provone import - was breaking tests from experiment" +) + @pytest.fixture(scope="module") def doc(): # Create new provone document with namespaces d1 = ProvONEDocument() - d1.add_namespace('dcterms', 'http://purl.org/dc/terms/') - d1.add_namespace('wfms', 'http://www.wfms.org/registry/') - d1.add_namespace('rdfs', 'http://www.w3.org/2000/01/rdf-schema#') - d1.add_namespace('nowpeople', 'http://www.provbook.org/nownews/people/') - d1.add_namespace('xsd', 'http://www.w3.org/2001/XMLSchema#') - d1.add_namespace('owl', 'http://www.w3.org/2002/07/owl#') + d1.add_namespace("dcterms", "http://purl.org/dc/terms/") + d1.add_namespace("wfms", "http://www.wfms.org/registry/") + d1.add_namespace("rdfs", "http://www.w3.org/2000/01/rdf-schema#") + d1.add_namespace("nowpeople", "http://www.provbook.org/nownews/people/") + d1.add_namespace("xsd", "http://www.w3.org/2001/XMLSchema#") + d1.add_namespace("owl", "http://www.w3.org/2002/07/owl#") return d1 def test_ispartof(doc): - workflow_1ex1 = doc.processExec('dcterms:identifier:wf1_ex1', - "2013-08-21 13:37:54", - "2013-08-21 13:37:59", {'wfms:completed': '1'}) - pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", - "2013-08-21 13:37:53") + workflow_1ex1 = doc.processExec( + "dcterms:identifier:wf1_ex1", + "2013-08-21 13:37:54", + "2013-08-21 13:37:59", + {"wfms:completed": "1"}, + ) + pe1 = doc.processExec( + "dcterms:identifier:e1_ex1", "2013-08-21 13:37:53", "2013-08-21 13:37:53" + ) doc.isPartOf(pe1, workflow_1ex1) # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) + with open("test.ttl", "w") as f: + f.write(doc.serialize(format="rdf", rdf_format="ttl")) def test_used(doc): - pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", - "2013-08-21 13:37:53") - dt1 = doc.data('dcterms:identifier:defparam1', {'rdfs:label': "filename", - 'prov:value': "DLEM_NEE_onedeg_v1.0nc", - 'wfms:type': "edu.sci.wfms.basic:File"}) + pe1 = doc.processExec( + "dcterms:identifier:e1_ex1", "2013-08-21 13:37:53", "2013-08-21 13:37:53" + ) + dt1 = doc.data( + "dcterms:identifier:defparam1", + { + "rdfs:label": "filename", + "prov:value": "DLEM_NEE_onedeg_v1.0nc", + "wfms:type": "edu.sci.wfms.basic:File", + }, + ) doc.used(pe1, dt1) # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) + with open("test.ttl", "w") as f: + f.write(doc.serialize(format="rdf", rdf_format="ttl")) def test_wasderivedfrom(doc): - - dt1 = doc.data('dcterms:identifier:defparam1', {'rdfs:label': "filename", - 'prov:value': "DLEM_NEE_onedeg_v1.0nc", - 'wfms:type': "edu.sci.wfms.basic:File"}) - dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) + dt1 = doc.data( + "dcterms:identifier:defparam1", + { + "rdfs:label": "filename", + "prov:value": "DLEM_NEE_onedeg_v1.0nc", + "wfms:type": "edu.sci.wfms.basic:File", + }, + ) + dt2 = doc.data("dcterms:identifier:defparam2", {"rdfs:label": "filename"}) doc.wasDerivedFrom(dt1, dt2) # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) + with open("test.ttl", "w") as f: + f.write(doc.serialize(format="rdf", rdf_format="ttl")) def test_dataonlink(doc): - dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) - dl1 = doc.dataLink('dcterms:identifier:e1_e2DL') + dt2 = doc.data("dcterms:identifier:defparam2", {"rdfs:label": "filename"}) + dl1 = doc.dataLink("dcterms:identifier:e1_e2DL") # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) + with open("test.ttl", "w") as f: + f.write(doc.serialize(format="rdf", rdf_format="ttl")) doc.dataOnLink(dt2, dl1) def test_wasgeneratedby(doc): - - dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) - pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", - "2013-08-21 13:37:53") + dt2 = doc.data("dcterms:identifier:defparam2", {"rdfs:label": "filename"}) + pe1 = doc.processExec( + "dcterms:identifier:e1_ex1", "2013-08-21 13:37:53", "2013-08-21 13:37:53" + ) doc.wasGeneratedBy(dt2, pe1) # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) + with open("test.ttl", "w") as f: + f.write(doc.serialize(format="rdf", rdf_format="ttl")) def test_wasassociatedwith(doc): - pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", - "2013-08-21 13:37:53") - p2 = doc.process('dcterms:identifier:e2', {'dcterms:title': - "TemporalStatistics"}) + pe1 = doc.processExec( + "dcterms:identifier:e1_ex1", "2013-08-21 13:37:53", "2013-08-21 13:37:53" + ) + p2 = doc.process("dcterms:identifier:e2", {"dcterms:title": "TemporalStatistics"}) doc.wasAssociatedWith(pe1, p2) def test_wasattributedto(doc): - p2 = doc.process('dcterms:identifier:e2', {'dcterms:title': - "TemporalStatistics"}) - u1 = doc.user('dcterms:identifier:John') + p2 = doc.process("dcterms:identifier:e2", {"dcterms:title": "TemporalStatistics"}) + u1 = doc.user("dcterms:identifier:John") doc.wasAttributedTo(p2, u1) def test_hasinport(doc): - p2 = doc.process('dcterms:identifier:e2', {'dcterms:title': - "TemporalStatistics"}) - i1 = doc.input_port('dcterms:identifier:e1_ip1', - {'dcterms:title': "input_vars", - 'wfms:signature': "gov.llnl.uvcdat.cdms:CDMSVariable"}) + p2 = doc.process("dcterms:identifier:e2", {"dcterms:title": "TemporalStatistics"}) + i1 = doc.input_port( + "dcterms:identifier:e1_ip1", + { + "dcterms:title": "input_vars", + "wfms:signature": "gov.llnl.uvcdat.cdms:CDMSVariable", + }, + ) doc.hasInPort(p2, i1) def test_dltoinport(doc): - dl1 = doc.dataLink('dcterms:identifier:e1_e2DL') - i1 = doc.input_port('dcterms:identifier:e1_ip1', - {'dcterms:title': "input_vars", - 'wfms:signature': "gov.llnl.uvcdat.cdms:CDMSVariable"}) + dl1 = doc.dataLink("dcterms:identifier:e1_e2DL") + i1 = doc.input_port( + "dcterms:identifier:e1_ip1", + { + "dcterms:title": "input_vars", + "wfms:signature": "gov.llnl.uvcdat.cdms:CDMSVariable", + }, + ) doc.DLToInPort(dl1, i1) def test_documentserialize(doc): # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) + with open("test.ttl", "w") as f: + f.write(doc.serialize(format="rdf", rdf_format="ttl")) def test_write_to_dot(doc): dot = provone_to_dot(doc) - dot.write_png('provone-test.png') + dot.write_png("provone-test.png") diff --git a/nidm/experiment/Acquisition.py b/nidm/experiment/Acquisition.py index 8abc306e..83b46ba1 100644 --- a/nidm/experiment/Acquisition.py +++ b/nidm/experiment/Acquisition.py @@ -1,13 +1,15 @@ +import os +import sys +import prov.model as pm import rdflib as rdf -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from ..core import Constants from ..experiment import Core from ..experiment.Core import getUUID -import prov.model as pm -class Acquisition(pm.ProvActivity,Core): + +class Acquisition(pm.ProvActivity, Core): """Class for NIDM-Experiment Acquisition-Level Objects. Default constructor uses empty graph with namespaces added from NIDM/Scripts/Constants.py. @@ -18,7 +20,8 @@ class Acquisition(pm.ProvActivity,Core): @copyright: University of California, Irvine 2017 """ - #constructor + + # constructor def __init__(self, session, attributes=None, uuid=None, add_default_type=True): """ Default constructor, creates a session activity and links to project object @@ -31,47 +34,61 @@ def __init__(self, session, attributes=None, uuid=None, add_default_type=True): if uuid is None: self._uuid = getUUID() - #execute default parent class constructor - super(Acquisition,self).__init__(session.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) + # execute default parent class constructor + super(Acquisition, self).__init__( + session.graph, + pm.QualifiedName( + pm.Namespace("niiri", Constants.NIIRI), self.get_uuid() + ), + attributes, + ) else: self._uuid = uuid - super(Acquisition,self).__init__(session.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) + super(Acquisition, self).__init__( + session.graph, + pm.QualifiedName( + pm.Namespace("niiri", Constants.NIIRI), self.get_uuid() + ), + attributes, + ) session.graph._add_record(self) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ACTIVITY}) - #self.add_attributes({pm.QualifiedName(pm.Namespace("dct",Constants.DCT),'isPartOf'):self}) + # self.add_attributes({pm.QualifiedName(pm.Namespace("dct",Constants.DCT),'isPartOf'):self}) - #list to store acquisition objects associated with this activity - self._acquisition_objects=[] - #if constructor is called with a session object then add this acquisition to the session + # list to store acquisition objects associated with this activity + self._acquisition_objects = [] + # if constructor is called with a session object then add this acquisition to the session - #carry graph object around + # carry graph object around self.graph = session.graph - #add acquisition to session + # add acquisition to session session.add_acquisition(self) - def add_acquisition_object(self,acquisition_object): + def add_acquisition_object(self, acquisition_object): """ Adds acquisition objects to acquisition activity, creating links and adding reference to acquisitions list :param acquisition: object of type "AcquisitionObject" from nidm API """ - #add acquisition object to self._acquisitions list + # add acquisition object to self._acquisitions list self._acquisition_objects.extend([acquisition_object]) - #create links in graph - self.graph.wasGeneratedBy(acquisition_object,self) + # create links in graph + self.graph.wasGeneratedBy(acquisition_object, self) + def get_acquisition_objects(self): return self._acquisition_objects - def acquisition_object_exists(self,uuid): - ''' + + def acquisition_object_exists(self, uuid): + """ Checks whether uuid is a registered acquisition object :param uuid: full uuid of acquisition :return: True if exists, False otherwise - ''' + """ if uuid in self._acquisition_objects: return True else: diff --git a/nidm/experiment/AcquisitionObject.py b/nidm/experiment/AcquisitionObject.py index fd0fc2fa..effb0454 100644 --- a/nidm/experiment/AcquisitionObject.py +++ b/nidm/experiment/AcquisitionObject.py @@ -1,12 +1,15 @@ -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import os +import sys +import prov.model as pm + +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import rdflib as rdf from ..core import Constants from ..experiment import Core from ..experiment.Core import getUUID -import prov.model as pm -class AcquisitionObject(pm.ProvEntity,Core): + +class AcquisitionObject(pm.ProvEntity, Core): """Class for NIDM-Experimenent AcquisitionObject-Level Objects. Default constructor uses empty graph with namespaces added from NIDM/Scripts/Constants.py. @@ -17,8 +20,9 @@ class AcquisitionObject(pm.ProvEntity,Core): @copyright: University of California, Irvine 2017 """ - #constructor - def __init__(self, acquisition,attributes=None, uuid=None): + + # constructor + def __init__(self, acquisition, attributes=None, uuid=None): """ Default constructor, creates an acquisition object and links to acquisition activity object @@ -31,20 +35,30 @@ def __init__(self, acquisition,attributes=None, uuid=None): if uuid is None: self._uuid = getUUID() - #execute default parent class constructor - super(AcquisitionObject,self).__init__(acquisition.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) + # execute default parent class constructor + super(AcquisitionObject, self).__init__( + acquisition.graph, + pm.QualifiedName( + pm.Namespace("niiri", Constants.NIIRI), self.get_uuid() + ), + attributes, + ) else: self._uuid = uuid - super(AcquisitionObject,self).__init__(acquisition.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) + super(AcquisitionObject, self).__init__( + acquisition.graph, + pm.QualifiedName( + pm.Namespace("niiri", Constants.NIIRI), self.get_uuid() + ), + attributes, + ) acquisition.graph._add_record(self) - #carry graph object around + # carry graph object around self.graph = acquisition.graph - #create link to acquisition activity + # create link to acquisition activity acquisition.add_acquisition_object(self) def __str__(self): return "NIDM-Experiment AcquisitionObject Class" - - diff --git a/nidm/experiment/AssessmentAcquisition.py b/nidm/experiment/AssessmentAcquisition.py index 7e2bb69b..d6883bfd 100644 --- a/nidm/experiment/AssessmentAcquisition.py +++ b/nidm/experiment/AssessmentAcquisition.py @@ -1,21 +1,24 @@ -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import os +import sys +import prov.model as pm + +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import rdflib as rdf -from ..experiment import Acquisition from ..core import Constants +from ..experiment import Acquisition from ..experiment.Core import getUUID -import prov.model as pm + class AssessmentAcquisition(Acquisition): """ - Default constructor, creates a session activity and links to project object + Default constructor, creates a session activity and links to project object - :param session: a session object + :param session: a session object """ - #constructor - def __init__(self, session,attributes=None, uuid=None, add_default_type=True): + # constructor + def __init__(self, session, attributes=None, uuid=None, add_default_type=True): """ Default constructor, creates an acquisition object and links to acquisition activity object @@ -25,18 +28,17 @@ def __init__(self, session,attributes=None, uuid=None, add_default_type=True): :return: none """ - #execute default parent class constructor - #execute default parent class constructor - super(AssessmentAcquisition,self).__init__(session,attributes,uuid) - #acquisition.graph._add_record(self) + # execute default parent class constructor + # execute default parent class constructor + super(AssessmentAcquisition, self).__init__(session, attributes, uuid) + # acquisition.graph._add_record(self) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ACTIVITY}) self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ASSESSMENT_ACQUISITION}) - #carry graph object around + # carry graph object around self.graph = session.graph - def __str__(self): return "NIDM-Experiment Assessment Acquisition Class" diff --git a/nidm/experiment/AssessmentObject.py b/nidm/experiment/AssessmentObject.py index 84d5dde0..8022bcce 100644 --- a/nidm/experiment/AssessmentObject.py +++ b/nidm/experiment/AssessmentObject.py @@ -1,9 +1,12 @@ -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import os +import sys +import prov.model as pm + +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import rdflib as rdf from ..core import Constants from ..experiment import AcquisitionObject -import prov.model as pm + class AssessmentObject(AcquisitionObject): """Class for NIDM-Experimenent generic AssessmentAcquisitionObject-Level Objects. @@ -16,8 +19,16 @@ class AssessmentObject(AcquisitionObject): @copyright: University of California, Irvine 2017 """ - #constructor - def __init__(self, acquisition,assessment_type=None,attributes=None, uuid=None, add_default_type=True): + + # constructor + def __init__( + self, + acquisition, + assessment_type=None, + attributes=None, + uuid=None, + add_default_type=True, + ): """ Default constructor, creates an acquisition object and links to acquisition activity object @@ -28,9 +39,9 @@ def __init__(self, acquisition,assessment_type=None,attributes=None, uuid=None, :return: none """ - #execute default parent class constructor - #execute default parent class constructor - super(AssessmentObject,self).__init__(acquisition,attributes, uuid) + # execute default parent class constructor + # execute default parent class constructor + super(AssessmentObject, self).__init__(acquisition, attributes, uuid) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ASSESSMENT_ENTITY}) @@ -38,9 +49,8 @@ def __init__(self, acquisition,assessment_type=None,attributes=None, uuid=None, if assessment_type is not None: self.add_attributes({pm.PROV_TYPE: assessment_type}) - #carry graph object around + # carry graph object around self.graph = acquisition.graph - def __str__(self): return "NIDM-Experiment Generic Assessment Object Class" diff --git a/nidm/experiment/CDE.py b/nidm/experiment/CDE.py index 59aad65f..ca365528 100644 --- a/nidm/experiment/CDE.py +++ b/nidm/experiment/CDE.py @@ -1,31 +1,28 @@ +import hashlib +from os import environ, path +import pickle import tempfile from urllib.request import urlretrieve from nidm.core import Constants -import hashlib -from os import path, environ -import pickle -from rdflib import Graph import nidm.experiment.Query - - +from rdflib import Graph def download_cde_files(): cde_dir = tempfile.gettempdir() for url in Constants.CDE_FILE_LOCATIONS: - urlretrieve( url, "{}/{}".format(cde_dir, url.split('/')[-1] ) ) + urlretrieve(url, "{}/{}".format(cde_dir, url.split("/")[-1])) return cde_dir def getCDEs(file_list=None): - if getCDEs.cache: return getCDEs.cache hasher = hashlib.md5() - hasher.update(str(file_list).encode('utf-8')) + hasher.update(str(file_list).encode("utf-8")) h = hasher.hexdigest() cache_file_name = tempfile.gettempdir() + "/cde_graph.{}.pickle".format(h) @@ -38,37 +35,35 @@ def getCDEs(file_list=None): rdf_graph = Graph() if not file_list: - - cde_dir = '' + cde_dir = "" if "CDE_DIR" in environ: - cde_dir = environ['CDE_DIR'] + cde_dir = environ["CDE_DIR"] - if (not cde_dir) and (path.isfile( '/opt/project/nidm/core/cde_dir/ants_cde.ttl' )): - cde_dir = '/opt/project/nidm/core/cde_dir' + if (not cde_dir) and ( + path.isfile("/opt/project/nidm/core/cde_dir/ants_cde.ttl") + ): + cde_dir = "/opt/project/nidm/core/cde_dir" - if (not cde_dir): + if not cde_dir: cde_dir = download_cde_files() - file_list = [ ] - for f in ['ants_cde.ttl', 'fs_cde.ttl', 'fsl_cde.ttl']: - fname = '{}/{}'.format(cde_dir, f) - if path.isfile( fname ): - file_list.append( fname ) - - + file_list = [] + for f in ["ants_cde.ttl", "fs_cde.ttl", "fsl_cde.ttl"]: + fname = "{}/{}".format(cde_dir, f) + if path.isfile(fname): + file_list.append(fname) for fname in file_list: if path.isfile(fname): cde_graph = nidm.experiment.Query.OpenGraph(fname) rdf_graph = rdf_graph + cde_graph - - - - cache_file = open(cache_file_name , 'wb') + cache_file = open(cache_file_name, "wb") pickle.dump(rdf_graph, cache_file) cache_file.close() getCDEs.cache = rdf_graph return rdf_graph -getCDEs.cache = None \ No newline at end of file + + +getCDEs.cache = None diff --git a/nidm/experiment/Core.py b/nidm/experiment/Core.py index 0e1a2f4e..70e58122 100644 --- a/nidm/experiment/Core.py +++ b/nidm/experiment/Core.py @@ -1,26 +1,24 @@ -import os,sys +from collections import OrderedDict +from io import StringIO +import json +import os +import random +import re +import string +import sys +import types import uuid - -from rdflib import Namespace -from rdflib.namespace import XSD -import types import graphviz -from rdflib import Graph, RDF, URIRef, util, plugin +from prov.dot import prov_to_dot +import prov.model as pm +from pydot import Edge +from rdflib import RDF, Graph, Namespace, URIRef, plugin, util +from rdflib.namespace import XSD from rdflib.serializer import Serializer - -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from ..core import Constants -import prov.model as pm -from prov.dot import prov_to_dot -from io import StringIO -from collections import OrderedDict -import json -import re -import string -import random -from pydot import Edge def getUUID(): uid = str(uuid.uuid1()) @@ -29,7 +27,7 @@ def getUUID(): if not (re.match("^[a-fA-F]+.*", uid)): # if first digit is not a character than replace it with a randomly selected hex character (a-f). uid_temp = uid - randint = random.randint(0,5) + randint = random.randint(0, 5) uid = string.ascii_lowercase[randint] + uid_temp[1:] return uid @@ -45,21 +43,23 @@ class Core(object): @copyright: University of California, Irvine 2017 """ - language = 'en' + + language = "en" + def __init__(self): """ Default constructor, loads empty graph and namespaces from NIDM/Scripts/Constants.py """ - #a new instance of NIDMDocument PROV document with namespaces already bound + # a new instance of NIDMDocument PROV document with namespaces already bound self.graph = Constants.NIDMDocument() - #make a local copy of the namespaces + # make a local copy of the namespaces self.namespaces = Constants.namespaces # storage for uuid self._uuid = None - #class constructor with user-supplied PROV document/graph, namespaces from Constants.py + # class constructor with user-supplied PROV document/graph, namespaces from Constants.py @classmethod - def withGraph(self,graph): + def withGraph(self, graph): """ Alternate constructor, loads user-supplied graph and default namespaces from NIDM/Scripts/Constants.py @@ -68,13 +68,13 @@ def withGraph(self,graph): """ self.graph = graph self.namespaces = {} - #bind namespaces to self.graph + # bind namespaces to self.graph for name, namespace in self.namespaces.items(): self.graph.add_namespace(name, namespace) - #class constructor with user-supplied graph and namespaces + # class constructor with user-supplied graph and namespaces @classmethod - def withGraphAndNamespaces(self,graph,namespaces): + def withGraphAndNamespaces(self, graph, namespaces): """ Alternate constructor, loads user-supplied graph and binds user-supplied namespaces @@ -83,18 +83,17 @@ def withGraphAndNamespaces(self,graph,namespaces): :return: none """ - self.graph = graph self.namespaces = namespaces - #bind namespaces to self.graph + # bind namespaces to self.graph for name, namespace in self.namespaces.items(): self.graph.add_namespace(name, namespace) def get_uuid(self): - ''' + """ returns UUID of self :return: - ''' + """ return self._uuid def getGraph(self): @@ -116,7 +115,7 @@ def addNamespace(self, prefix, uri): :param uri: namespace URI :return: none """ - self.graph.add_namespace(prefix,uri) + self.graph.add_namespace(prefix, uri) def checkNamespacePrefix(self, prefix): """ @@ -124,32 +123,40 @@ def checkNamespacePrefix(self, prefix): :param prefix: namespace identifier :return: True if prefix exists, False if not """ - #check if prefix already exists + # check if prefix already exists if prefix in self.graph._namespaces.keys(): - #prefix already exists + # prefix already exists return True else: return False def safe_string(self, string): - return string.strip().replace(" ","_").replace("-", "_").replace(",", "_").replace("(", "_").replace(")","_")\ - .replace("'","_").replace("/", "_").replace("#","num") - - - - def getDataType(self,var): + return ( + string.strip() + .replace(" ", "_") + .replace("-", "_") + .replace(",", "_") + .replace("(", "_") + .replace(")", "_") + .replace("'", "_") + .replace("/", "_") + .replace("#", "num") + ) + + def getDataType(self, var): if type(var) is int: return pm.XSD_INTEGER elif type(var) is float: return pm.XSD_FLOAT - elif (type(var) is str): + elif type(var) is str: return pm.XSD_STRING - elif (type(var) is list): + elif type(var) is list: return list else: print("datatype not found...") return None - def add_person(self,uuid=None,attributes=None,add_default_type=True): + + def add_person(self, uuid=None, attributes=None, add_default_type=True): """ Simply adds prov:agent to graph and returns object :param role: @@ -157,26 +164,30 @@ def add_person(self,uuid=None,attributes=None,add_default_type=True): :return: """ - if (uuid != None): - #add Person agent with existing uuid - person = self.graph.agent(Constants.namespaces["niiri"][uuid],other_attributes=attributes) + if uuid != None: + # add Person agent with existing uuid + person = self.graph.agent( + Constants.namespaces["niiri"][uuid], other_attributes=attributes + ) else: - #add Person agent - person = self.graph.agent(Constants.namespaces["niiri"][getUUID()],other_attributes=attributes) + # add Person agent + person = self.graph.agent( + Constants.namespaces["niiri"][getUUID()], other_attributes=attributes + ) if add_default_type: - #add minimal attributes to person - person.add_attributes({pm.PROV_TYPE: pm.PROV['Person']}) + # add minimal attributes to person + person.add_attributes({pm.PROV_TYPE: pm.PROV["Person"]}) - #connect self to person serving as role - #if(isinstance(self,pm.ProvActivity)): + # connect self to person serving as role + # if(isinstance(self,pm.ProvActivity)): # self.wasAssociatedWith(person) - #elif(isinstance(self,pm.ProvEntity)): + # elif(isinstance(self,pm.ProvEntity)): # self.wasAttributedTo(person) return person - def add_qualified_association(self,person,role,plan=None, attributes=None): + def add_qualified_association(self, person, role, plan=None, attributes=None): """ Adds a qualified association to self object :param person: prov:agent to associated @@ -186,19 +197,21 @@ def add_qualified_association(self,person,role,plan=None, attributes=None): :return: association """ - #connect self to person serving as role - #WIP this doesn't work for subclasses as they don't have the pm.ProvActivity type - #Might be able to use the following and look into the tuples but for now skip this check - #import inspect - #class_tree = inspect.getclasstree([self.__class__]) + # connect self to person serving as role + # WIP this doesn't work for subclasses as they don't have the pm.ProvActivity type + # Might be able to use the following and look into the tuples but for now skip this check + # import inspect + # class_tree = inspect.getclasstree([self.__class__]) - #if(isinstance(self, pm.ProvActivity)): + # if(isinstance(self, pm.ProvActivity)): - #associate person with activity for qualified association - assoc = self.graph.association(agent=person, activity=self, other_attributes={pm.PROV_ROLE:role}) + # associate person with activity for qualified association + assoc = self.graph.association( + agent=person, activity=self, other_attributes={pm.PROV_ROLE: role} + ) - #add wasAssociatedWith association - #self.wasAssociatedWith(person) + # add wasAssociatedWith association + # self.wasAssociatedWith(person) return assoc @@ -211,29 +224,45 @@ def addLiteralAttribute(self, namespace_prefix, term, object, namespace_uri=None :param namespace_uri: If namespace_prefix isn't one already used then use this optional argument to define :return: none """ - #figure out datatype of literal + # figure out datatype of literal datatype = self.getDataType(object) - #check if namespace prefix already exists in graph + # check if namespace prefix already exists in graph if not self.checkNamespacePrefix(namespace_prefix): - #if so, use URI - #namespace_uri = self.namespaces[namespace_prefix] - #else: #add namespace_uri + prefix to graph - if (namespace_uri == None): - raise TypeError("Namespace_uri argument must be defined for new namespaces") + # if so, use URI + # namespace_uri = self.namespaces[namespace_prefix] + # else: #add namespace_uri + prefix to graph + if namespace_uri == None: + raise TypeError( + "Namespace_uri argument must be defined for new namespaces" + ) else: - self.addNamespace(namespace_prefix,namespace_uri) + self.addNamespace(namespace_prefix, namespace_uri) - #figure out if predicate namespace is defined, if not, return predicate namespace error + # figure out if predicate namespace is defined, if not, return predicate namespace error try: - if (datatype != None): - self.add_attributes({str(namespace_prefix + ':' + term): pm.Literal(object, datatype=datatype)}) + if datatype != None: + self.add_attributes( + { + str(namespace_prefix + ":" + term): pm.Literal( + object, datatype=datatype + ) + } + ) else: - self.add_attributes({str(namespace_prefix + ':' + term): pm.Literal(object)}) + self.add_attributes( + {str(namespace_prefix + ":" + term): pm.Literal(object)} + ) except KeyError as e: - print("\nPredicate namespace identifier \" %s \" not found! \n" % (str(e).split("'")[1])) - print("Use addNamespace method to add namespace before adding literal attribute \n") + print( + '\nPredicate namespace identifier " %s " not found! \n' + % (str(e).split("'")[1]) + ) + print( + "Use addNamespace method to add namespace before adding literal attribute \n" + ) print("No attribute has been added \n") - def addAttributesWithNamespaces(self,id,attributes): + + def addAttributesWithNamespaces(self, id, attributes): """ Adds generic attributes in bulk to object [id] and inserts into the graph @@ -243,25 +272,41 @@ def addAttributesWithNamespaces(self,id,attributes): {uri:"http://ncitt.ncit.nih.gov/", prefix:"ncit", term:"gender", value:"M"}] :return: TypeError if namespace prefix already exists in graph but URI is different """ - #iterate through list of attributes + # iterate through list of attributes for tuple in attributes: - #check if namespace prefix already exists in graph - if self.checkNamespacePrefix(tuple['prefix']): - #checking if existing prefix maps to same namespaceURI, if so use it, if not then raise error - if (self.namespaces[tuple['prefix']] != tuple['uri']): - raise TypeError("Namespace prefix: " + tuple['prefix'] + "already exists in document") - - else: #add tuple to graph - self.addNamespace(tuple['prefix'], tuple['uri']) - - #figure out datatype of literal - datatype = self.getDataType(tuple['value']) - if (datatype != None): - id.add_attributes({self.namespaces[tuple['prefix']][tuple['term']]:pm.Literal(tuple['value'],datatype=datatype)}) + # check if namespace prefix already exists in graph + if self.checkNamespacePrefix(tuple["prefix"]): + # checking if existing prefix maps to same namespaceURI, if so use it, if not then raise error + if self.namespaces[tuple["prefix"]] != tuple["uri"]: + raise TypeError( + "Namespace prefix: " + + tuple["prefix"] + + "already exists in document" + ) + + else: # add tuple to graph + self.addNamespace(tuple["prefix"], tuple["uri"]) + + # figure out datatype of literal + datatype = self.getDataType(tuple["value"]) + if datatype != None: + id.add_attributes( + { + self.namespaces[tuple["prefix"]][tuple["term"]]: pm.Literal( + tuple["value"], datatype=datatype + ) + } + ) else: - id.add_attributes({self.namespaces[tuple['prefix']][tuple['term']]:pm.Literal(tuple['value'])}) + id.add_attributes( + { + self.namespaces[tuple["prefix"]][tuple["term"]]: pm.Literal( + tuple["value"] + ) + } + ) - def addAttributes(self,id,attributes): + def addAttributes(self, id, attributes): """ Adds generic attributes in bulk to object [id] and inserts into the graph @@ -270,70 +315,76 @@ def addAttributes(self,id,attributes): example: {"ncit:age":15,"ncit:gender":"M", Constants.NIDM_FAMILY_NAME:"Keator"} :return: TypeError if namespace prefix does not exist in graph """ - #iterate through attributes + # iterate through attributes for key in attributes.keys(): - #is the key already mapped to a URL (i.e. using one of the constants from Constants.py) or is it in prefix:term form? - #if not validators.url(key): - #check if namespace prefix already exists in graph or #if we're using a Constants reference - if (not self.checkNamespacePrefix(key.split(':')[0])): - raise TypeError("Namespace prefix " + key + " not in graph, use addAttributesWithNamespaces or manually add!") - #figure out datatype of literal + # is the key already mapped to a URL (i.e. using one of the constants from Constants.py) or is it in prefix:term form? + # if not validators.url(key): + # check if namespace prefix already exists in graph or #if we're using a Constants reference + if not self.checkNamespacePrefix(key.split(":")[0]): + raise TypeError( + "Namespace prefix " + + key + + " not in graph, use addAttributesWithNamespaces or manually add!" + ) + # figure out datatype of literal datatype = self.getDataType(attributes[key]) - #if (not validators.url(key)): - #we must be using the prefix:term form instead of a constant directly + # if (not validators.url(key)): + # we must be using the prefix:term form instead of a constant directly # if (datatype != None): # id.add_attributes({self.namespaces[key.split(':')[0]][key.split(':')[1]]:Literal(attributes[key],datatype=datatype)}) # else: # id.add_attributes({self.namespaces[key.split(':')[0]][key.split(':')[1]]:Literal(attributes[key])}) - #else: - #we're using the Constants form - if (datatype != None): - id.add_attributes({key:pm.Literal(attributes[key],datatype=datatype)}) + # else: + # we're using the Constants form + if datatype != None: + id.add_attributes({key: pm.Literal(attributes[key], datatype=datatype)}) else: - id.add_attributes({key:pm.Literal(attributes[key])}) + id.add_attributes({key: pm.Literal(attributes[key])}) - def get_metadata_dict(self,NIDM_TYPE): + def get_metadata_dict(self, NIDM_TYPE): """ This function converts metadata to a dictionary using uris as keys :param NIDM_TYPE: a prov qualified name type (e.g. Constants.NIDM_PROJECT, Constants.NIDM_SESSION, etc.) :return: dictionary object containing metadata """ - #create empty project_metadata json object + # create empty project_metadata json object metadata = {} - #use RDFLib here for temporary graph making query easier + # use RDFLib here for temporary graph making query easier rdf_graph = Graph() - rdf_graph_parse = rdf_graph.parse(source=StringIO(self.serializeTurtle()),format='turtle') + rdf_graph_parse = rdf_graph.parse( + source=StringIO(self.serializeTurtle()), format="turtle" + ) - #get subject uri for object + # get subject uri for object - - uri=None - for s in rdf_graph_parse.subjects(predicate=RDF.type,object=URIRef(NIDM_TYPE.uri)): - uri=s + uri = None + for s in rdf_graph_parse.subjects( + predicate=RDF.type, object=URIRef(NIDM_TYPE.uri) + ): + uri = s if uri is None: - print("Error finding %s in NIDM-Exp Graph" %NIDM_TYPE) + print("Error finding %s in NIDM-Exp Graph" % NIDM_TYPE) return metadata - #Cycle through metadata and add to json + # Cycle through metadata and add to json for predicate, objects in rdf_graph.predicate_objects(subject=uri): metadata[str(predicate)] = str(objects) return metadata - def serializeTurtle(self): + def serializeTurtle(self): """ Serializes graph to Turtle format :return: text of serialized graph in Turtle format """ - return self.graph.serialize(None, format='rdf', rdf_format='ttl') + return self.graph.serialize(None, format="rdf", rdf_format="ttl") def serializeTrig(self, identifier=None): - """ Serializes graph to Turtle format :param identifier: Optional identifier to use for graph serialization @@ -341,26 +392,26 @@ def serializeTrig(self, identifier=None): """ if identifier is not None: rdf_graph = Graph(identifier=identifier) - rdf_graph.parse(source=StringIO(self.serializeTurtle()),format='turtle') + rdf_graph.parse(source=StringIO(self.serializeTurtle()), format="turtle") else: rdf_graph = Graph() - rdf_graph.parse(source=StringIO(self.serializeTurtle()),format='turtle') - - #return rdf_graph.serialize(format='trig').decode('ASCII') - return rdf_graph.serialize(format='trig') + rdf_graph.parse(source=StringIO(self.serializeTurtle()), format="turtle") + # return rdf_graph.serialize(format='trig').decode('ASCII') + return rdf_graph.serialize(format="trig") def serializeJSONLD(self): """ Serializes graph to JSON-LD format :return: text of serialized graph in JSON-LD format """ - #workaround to get JSONLD from RDFLib... + # workaround to get JSONLD from RDFLib... rdf_graph = Graph() - #rdf_graph_parse = rdf_graph.parse(source=StringIO(self.serializeTurtle()),format='turtle') - rdf_graph_parse = rdf_graph.parse(source=StringIO(self.graph.serialize(None, format='rdf', rdf_format='ttl')),format='turtle') - - + # rdf_graph_parse = rdf_graph.parse(source=StringIO(self.serializeTurtle()),format='turtle') + rdf_graph_parse = rdf_graph.parse( + source=StringIO(self.graph.serialize(None, format="rdf", rdf_format="ttl")), + format="turtle", + ) # WIP: currently this creates a default JSON-LD context from Constants.py and not in the correct way from the # NIDM-E OWL files that that will be the next iteration @@ -371,74 +422,72 @@ def serializeJSONLD(self): context = dict(context1, **context2) # WIP: LOOK AT https://github.com/satra/nidm-jsonld - #return rdf_graph_parse.serialize(format='json-ld', context=context, indent=4).decode('ASCII') - #g=rdf_graph_parse.serialize(format='json-ld', indent=4).decode('ASCII') - g = rdf_graph_parse.serialize(format='json-ld', indent=4) - + # return rdf_graph_parse.serialize(format='json-ld', context=context, indent=4).decode('ASCII') + # g=rdf_graph_parse.serialize(format='json-ld', indent=4).decode('ASCII') + g = rdf_graph_parse.serialize(format="json-ld", indent=4) import pyld as ld - return json.dumps(ld.jsonld.compact(json.loads(g), context),indent=4) + return json.dumps(ld.jsonld.compact(json.loads(g), context), indent=4) def createDefaultJSONLDcontext(self): - ''' + """ This function returns a context dictionary for NIDM-E JSON serializations :return: context dictionary - ''' + """ - from nidm.experiment.Utils import load_nidm_owl_files from nidm.core.Constants import namespaces + from nidm.experiment.Utils import load_nidm_owl_files - #load current OWL files - term_graph=load_nidm_owl_files() - - context={} + # load current OWL files + term_graph = load_nidm_owl_files() + context = {} - context['@version'] = 1.1 - context['records'] = {} - context['records']['@container'] = "@type" - context['records']['@id'] = "@graph" + context["@version"] = 1.1 + context["records"] = {} + context["records"]["@container"] = "@type" + context["records"]["@id"] = "@graph" - #load Constants.namespaces + # load Constants.namespaces context.update(Constants.namespaces) - context.update ({ - "xsd": {"@type": "@id","@id":"http://www.w3.org/2001/XMLSchema#"}, - "prov": {"@type": "@id","@id":"http://www.w3.org/ns/prov#"}, - "agent": { "@type": "@id", "@id": "prov:agent" }, - "entity": { "@type": "@id", "@id": "prov:entity" }, - "activity": { "@type": "@id", "@id": "prov:activity" }, - "hadPlan": { "@type": "@id", "@id": "prov:hadPlan" }, - "hadRole": { "@type": "@id", "@id": "prov:hadRole" }, - "wasAttributedTo": { "@type": "@id", "@id": "prov:wasAttributedTo" }, - "association": { "@type": "@id", "@id": "prov:qualifiedAssociation" }, - "usage": { "@type": "@id", "@id": "prov:qualifiedUsage" }, - "generation": { "@type": "@id", "@id": "prov:qualifiedGeneration" }, - "startedAtTime": { "@type": "xsd:dateTime", "@id": "prov:startedAtTime" }, - "endedAtTime": { "@type": "xsd:dateTime", "@id": "prov:endedAtTime" }, - }) - - - #add namespaces from Constants.namespaces - for key,value in namespaces.items(): - #context['@context'][key] = value + context.update( + { + "xsd": {"@type": "@id", "@id": "http://www.w3.org/2001/XMLSchema#"}, + "prov": {"@type": "@id", "@id": "http://www.w3.org/ns/prov#"}, + "agent": {"@type": "@id", "@id": "prov:agent"}, + "entity": {"@type": "@id", "@id": "prov:entity"}, + "activity": {"@type": "@id", "@id": "prov:activity"}, + "hadPlan": {"@type": "@id", "@id": "prov:hadPlan"}, + "hadRole": {"@type": "@id", "@id": "prov:hadRole"}, + "wasAttributedTo": {"@type": "@id", "@id": "prov:wasAttributedTo"}, + "association": {"@type": "@id", "@id": "prov:qualifiedAssociation"}, + "usage": {"@type": "@id", "@id": "prov:qualifiedUsage"}, + "generation": {"@type": "@id", "@id": "prov:qualifiedGeneration"}, + "startedAtTime": {"@type": "xsd:dateTime", "@id": "prov:startedAtTime"}, + "endedAtTime": {"@type": "xsd:dateTime", "@id": "prov:endedAtTime"}, + } + ) + + # add namespaces from Constants.namespaces + for key, value in namespaces.items(): + # context['@context'][key] = value context[key] = value - #add terms from Constants.nidm_experiment_terms + # add terms from Constants.nidm_experiment_terms for term in Constants.nidm_experiment_terms: - #context['@context'][term.localpart] = term.uri + # context['@context'][term.localpart] = term.uri context[term.localpart] = term.uri + # add prefix's from current document...this accounts for new terms + context.update(self.prefix_to_context()) - #add prefix's from current document...this accounts for new terms - context.update ( self.prefix_to_context() ) - - #WIP - #cycle through OWL graph and add terms + # WIP + # cycle through OWL graph and add terms # For anything that has a label - #for s, o in sorted(term_graph.subject_objects(Constants.RDFS['label'])): + # for s, o in sorted(term_graph.subject_objects(Constants.RDFS['label'])): # json_key = str(o) # if '_' in json_key: # json_key = str(o).split('_')[1] @@ -452,29 +501,27 @@ def createDefaultJSONLDcontext(self): return context - def save_DotGraph(self,filename,format=None): + def save_DotGraph(self, filename, format=None): dot = prov_to_dot(self.graph) ISPARTOF = { - 'label': 'isPartOf', - 'fontsize': '10.0', - 'color': 'darkgreen', - 'fontcolor' : 'darkgreen' + "label": "isPartOf", + "fontsize": "10.0", + "color": "darkgreen", + "fontcolor": "darkgreen", } style = ISPARTOF - - - - # query self.graph for Project uuids - #use RDFLib here for temporary graph making query easier + # use RDFLib here for temporary graph making query easier rdf_graph = Graph() - rdf_graph = rdf_graph.parse(source=StringIO(self.graph.serialize(None, format='rdf', rdf_format='ttl')),format='turtle') + rdf_graph = rdf_graph.parse( + source=StringIO(self.graph.serialize(None, format="rdf", rdf_format="ttl")), + format="turtle", + ) - - #SPARQL query to get project UUIDs - query = ''' + # SPARQL query to get project UUIDs + query = """ PREFIX nidm: PREFIX rdf: @@ -485,19 +532,21 @@ def save_DotGraph(self,filename,format=None): } } - ''' + """ qres = rdf_graph.query(query) for row in qres: - print("project uuid = %s" %row) + print("project uuid = %s" % row) # parse uuid from project URI - #project_uuid = str(row[0]).rsplit('/', 1)[-1] + # project_uuid = str(row[0]).rsplit('/', 1)[-1] project_uuid = str(row[0]) # for each Project uuid search dot structure for Project uuid project_node = None - for key,value in dot.obj_dict['nodes'].items(): + for key, value in dot.obj_dict["nodes"].items(): # get node number in DOT graph for Project - if 'URL' in dot.obj_dict['nodes'][key][0]['attributes']: - if project_uuid in str(dot.obj_dict['nodes'][key][0]['attributes']['URL']): + if "URL" in dot.obj_dict["nodes"][key][0]["attributes"]: + if project_uuid in str( + dot.obj_dict["nodes"][key][0]["attributes"]["URL"] + ): project_node = key break @@ -505,54 +554,60 @@ def save_DotGraph(self,filename,format=None): for session in self.sessions: print(session) - for key,value in dot.obj_dict['nodes'].items(): + for key, value in dot.obj_dict["nodes"].items(): # get node number in DOT graph for Project - if 'URL' in dot.obj_dict['nodes'][key][0]['attributes']: - if session.identifier.uri in str(dot.obj_dict['nodes'][key][0]['attributes']['URL']): - session_node= key - #print("session node = %s" %key) + if "URL" in dot.obj_dict["nodes"][key][0]["attributes"]: + if session.identifier.uri in str( + dot.obj_dict["nodes"][key][0]["attributes"]["URL"] + ): + session_node = key + # print("session node = %s" %key) # add to DOT structure edge between project_node and session_node dot.add_edge(Edge(session_node, project_node, **style)) - - - # for each Acquisition in Session class ._acquisitions list, find node numbers in DOT graph for acquisition in session.get_acquisitions(): # search through the nodes again to figure out node number for acquisition - for key,value in dot.obj_dict['nodes'].items(): + for key, value in dot.obj_dict["nodes"].items(): # get node number in DOT graph for Project - if 'URL' in dot.obj_dict['nodes'][key][0]['attributes']: - if acquisition.identifier.uri in str(dot.obj_dict['nodes'][key][0]['attributes']['URL']): + if "URL" in dot.obj_dict["nodes"][key][0]["attributes"]: + if acquisition.identifier.uri in str( + dot.obj_dict["nodes"][key][0]["attributes"][ + "URL" + ] + ): acquisition_node = key - #print("acquisition node = %s" %key) - - dot.add_edge(Edge(acquisition_node, session_node, **style)) + # print("acquisition node = %s" %key) + dot.add_edge( + Edge( + acquisition_node, session_node, **style + ) + ) - #add some logic to find nodes with dct:hasPart relation and add those edges to graph...prov_to_dot ignores these + # add some logic to find nodes with dct:hasPart relation and add those edges to graph...prov_to_dot ignores these if not (format == "None"): - dot.write(filename,format=format) + dot.write(filename, format=format) else: - dot.write(filename,format="pdf") + dot.write(filename, format="pdf") def prefix_to_context(self): - ''' + """ This function returns a context dictionary for JSONLD export from current NIDM-Exp document.... :return: Context dictionary for JSONLD - ''' + """ - #This sets up basic contexts from namespaces in documents - context=OrderedDict() - for key,value in self.graph._namespaces.items(): - #context[key] = {} - #context[key]['@type']='@id' - #context[key]['@id']= value.uri + # This sets up basic contexts from namespaces in documents + context = OrderedDict() + for key, value in self.graph._namespaces.items(): + # context[key] = {} + # context[key]['@type']='@id' + # context[key]['@id']= value.uri - #context[key]['@type']='@id' + # context[key]['@type']='@id' if type(value.uri) == str: - context[key]= value.uri + context[key] = value.uri # added for some weird namespaces where key is URIRef and value is Namespace # seems to only apply to PROV and NIDM qualified names. # has something to do with read_nidm function in Utils and add_metadata_for_subject @@ -560,11 +615,10 @@ def prefix_to_context(self): elif type(key) == URIRef: continue else: - context[key]= str(value.uri) - + context[key] = str(value.uri) - #This adds suffix part of namespaces as IDs to make things read easier in JSONLD - #for namespace in self.graph.namespaces: + # This adds suffix part of namespaces as IDs to make things read easier in JSONLD + # for namespace in self.graph.namespaces: # context[namespace.qname()]='@id' # context[namespace.qname()]=namespace.qname().localpart diff --git a/nidm/experiment/DataElement.py b/nidm/experiment/DataElement.py index 49f50f5d..bc4f15ab 100644 --- a/nidm/experiment/DataElement.py +++ b/nidm/experiment/DataElement.py @@ -1,12 +1,15 @@ -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import os +import sys +import prov.model as pm + +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import rdflib as rdf from ..core import Constants from ..experiment import Core from ..experiment.Core import getUUID -import prov.model as pm -class DataElement(pm.ProvEntity,Core): + +class DataElement(pm.ProvEntity, Core): """Class for NIDM-Experiment DataElement Objects. Default constructor uses empty graph with namespaces added from NIDM/Scripts/Constants.py. @@ -17,7 +20,8 @@ class DataElement(pm.ProvEntity,Core): @copyright: University of California, Irvine 2019 """ - #constructor + + # constructor def __init__(self, project, attributes=None, uuid=None, add_default_type=True): """ Default constructor, creates an acquisition object and links to acquisition activity object @@ -30,10 +34,16 @@ def __init__(self, project, attributes=None, uuid=None, add_default_type=True): """ if uuid is None: - #execute default parent class constructor - super(DataElement,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes) + # execute default parent class constructor + super(DataElement, self).__init__( + project.graph, + pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI), getUUID()), + attributes, + ) else: - super(DataElement,self).__init__(project.graph,pm.Identifier(uuid),attributes) + super(DataElement, self).__init__( + project.graph, pm.Identifier(uuid), attributes + ) project.graph._add_record(self) @@ -42,10 +52,9 @@ def __init__(self, project, attributes=None, uuid=None, add_default_type=True): project.add_dataelements(self) self.graph = project.graph - #list to store acquisition objects associated with this activity - self._derivative_objects=[] - #if constructor is called with a session object then add this acquisition to the session - + # list to store acquisition objects associated with this activity + self._derivative_objects = [] + # if constructor is called with a session object then add this acquisition to the session def __str__(self): return "NIDM-Experiment DataElement Class" diff --git a/nidm/experiment/DemographicsObject.py b/nidm/experiment/DemographicsObject.py index befdaa5d..20581fdc 100644 --- a/nidm/experiment/DemographicsObject.py +++ b/nidm/experiment/DemographicsObject.py @@ -1,9 +1,12 @@ -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import os +import sys +import prov.model as pm + +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import rdflib as rdf from ..core import Constants from ..experiment import AcquisitionObject -import prov.model as pm + class DemographicsObject(AcquisitionObject): """Class for NIDM-Experimenent MRAcquisitionObject-Level Objects. @@ -16,8 +19,9 @@ class DemographicsObject(AcquisitionObject): @copyright: University of California, Irvine 2017 """ - #constructor - def __init__(self, acquisition,attributes=None, uuid=None, add_default_type=True): + + # constructor + def __init__(self, acquisition, attributes=None, uuid=None, add_default_type=True): """ Default constructor, creates an acquisition object and links to acquisition activity object @@ -27,19 +31,21 @@ def __init__(self, acquisition,attributes=None, uuid=None, add_default_type=True :return: none """ - #execute default parent class constructor - #execute default parent class constructor - super(DemographicsObject,self).__init__(acquisition,attributes, uuid) - + # execute default parent class constructor + # execute default parent class constructor + super(DemographicsObject, self).__init__(acquisition, attributes, uuid) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ASSESSMENT_ENTITY}) self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ENTITY}) - self.add_attributes({Constants.NIDM_ASSESSMENT_USAGE_TYPE: Constants.NIDM_DEMOGRAPHICS_ENTITY}) + self.add_attributes( + { + Constants.NIDM_ASSESSMENT_USAGE_TYPE: Constants.NIDM_DEMOGRAPHICS_ENTITY + } + ) - #carry graph object around + # carry graph object around self.graph = acquisition.graph - def __str__(self): return "NIDM-Experiment Demographics Object Class" diff --git a/nidm/experiment/Derivative.py b/nidm/experiment/Derivative.py index 9a64ca07..b6b3c419 100644 --- a/nidm/experiment/Derivative.py +++ b/nidm/experiment/Derivative.py @@ -1,13 +1,15 @@ +import os +import sys +import prov.model as pm import rdflib as rdf -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from ..core import Constants from ..experiment import Core from ..experiment.Core import getUUID -import prov.model as pm -class Derivative(pm.ProvActivity,Core): + +class Derivative(pm.ProvActivity, Core): """ Class for NIDM-Experimenent Derivative Objects. @@ -19,7 +21,8 @@ class Derivative(pm.ProvActivity,Core): @copyright: University of California, Irvine 2017 """ - #constructor + + # constructor def __init__(self, project, attributes=None, uuid=None): """ Default constructor, creates a derivative activity @@ -31,43 +34,52 @@ def __init__(self, project, attributes=None, uuid=None): if uuid is None: self._uuid = getUUID() - #execute default parent class constructor - super(Derivative,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) + # execute default parent class constructor + super(Derivative, self).__init__( + project.graph, + pm.QualifiedName( + pm.Namespace("niiri", Constants.NIIRI), self.get_uuid() + ), + attributes, + ) else: self._uuid = uuid - super(Derivative,self).__init__(project.graph, pm.Identifier(uuid),attributes) + super(Derivative, self).__init__( + project.graph, pm.Identifier(uuid), attributes + ) project.graph._add_record(self) - #list to store acquisition objects associated with this activity - self._derivative_objects=[] - #if constructor is called with a session object then add this acquisition to the session + # list to store acquisition objects associated with this activity + self._derivative_objects = [] + # if constructor is called with a session object then add this acquisition to the session - #carry graph object around + # carry graph object around self.graph = project.graph project.add_derivatives(self) - - def add_derivative_object(self,derivative_object): + def add_derivative_object(self, derivative_object): """ Adds derivative objects to derivative activity, creating links and adding reference to derivatives list :param derivative_object: object of type "DerivativeObject" from nidm API """ - #add derivative object to self._derivatives list + # add derivative object to self._derivatives list self._derivative_objects.extend([derivative_object]) - #create links in graph - self.graph.wasGeneratedBy(derivative_object,self) + # create links in graph + self.graph.wasGeneratedBy(derivative_object, self) + def get_derivative_objects(self): return self._derivative_objects - def derivative_object_exists(self,uuid): - ''' + + def derivative_object_exists(self, uuid): + """ Checks whether uuid is a registered derivative object :param uuid: full uuid of derivative object :return: True if exists, False otherwise - ''' + """ if uuid in self._derivative_objects: return True else: diff --git a/nidm/experiment/DerivativeObject.py b/nidm/experiment/DerivativeObject.py index bcf29732..6d327171 100644 --- a/nidm/experiment/DerivativeObject.py +++ b/nidm/experiment/DerivativeObject.py @@ -1,12 +1,15 @@ -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import os +import sys +import prov.model as pm + +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import rdflib as rdf from ..core import Constants from ..experiment import Core from ..experiment.Core import getUUID -import prov.model as pm -class DerivativeObject(pm.ProvEntity,Core): + +class DerivativeObject(pm.ProvEntity, Core): """Class for NIDM-Experimenent DerivativeObject-Level Objects. Default constructor uses empty graph with namespaces added from NIDM/Scripts/Constants.py. @@ -17,8 +20,9 @@ class DerivativeObject(pm.ProvEntity,Core): @copyright: University of California, Irvine 2019 """ - #constructor - def __init__(self, derivative,attributes=None, uuid=None): + + # constructor + def __init__(self, derivative, attributes=None, uuid=None): """ Default constructor, creates an derivative object and links to derivative activity object @@ -30,19 +34,23 @@ def __init__(self, derivative,attributes=None, uuid=None): """ if uuid is None: - #execute default parent class constructor - super(DerivativeObject,self).__init__(derivative.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes) + # execute default parent class constructor + super(DerivativeObject, self).__init__( + derivative.graph, + pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI), getUUID()), + attributes, + ) else: - super(DerivativeObject,self).__init__(derivative.graph, pm.Identifier(uuid),attributes) + super(DerivativeObject, self).__init__( + derivative.graph, pm.Identifier(uuid), attributes + ) derivative.graph._add_record(self) - #carry graph object around + # carry graph object around self.graph = derivative.graph - #create link to acquisition activity + # create link to acquisition activity derivative.add_derivative_object(self) def __str__(self): return "NIDM-Experiment DerivativeObject Class" - - diff --git a/nidm/experiment/MRAcquisition.py b/nidm/experiment/MRAcquisition.py index fb0832b4..66333feb 100644 --- a/nidm/experiment/MRAcquisition.py +++ b/nidm/experiment/MRAcquisition.py @@ -1,20 +1,23 @@ -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import os +import sys +import prov.model as pm + +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import rdflib as rdf -from ..experiment import Acquisition from ..core import Constants -import prov.model as pm +from ..experiment import Acquisition + class MRAcquisition(Acquisition): """ - Default constructor, creates a session activity and links to project object + Default constructor, creates a session activity and links to project object - :param session: a session object + :param session: a session object """ - #constructor - def __init__(self, session,attributes=None, uuid=None, add_default_type=True): + # constructor + def __init__(self, session, attributes=None, uuid=None, add_default_type=True): """ Default constructor, creates an acquisition object and links to acquisition activity object @@ -24,17 +27,16 @@ def __init__(self, session,attributes=None, uuid=None, add_default_type=True): :return: none """ - #execute default parent class constructor - #execute default parent class constructor - super(MRAcquisition,self).__init__(session,attributes,uuid) - #acquisition.graph._add_record(self) + # execute default parent class constructor + # execute default parent class constructor + super(MRAcquisition, self).__init__(session, attributes, uuid) + # acquisition.graph._add_record(self) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ACTIVITY}) - #carry graph object around + # carry graph object around self.graph = session.graph - def __str__(self): return "NIDM-Experiment MRI Acquisition Class" diff --git a/nidm/experiment/MRObject.py b/nidm/experiment/MRObject.py index 1299e628..6838322d 100644 --- a/nidm/experiment/MRObject.py +++ b/nidm/experiment/MRObject.py @@ -1,9 +1,12 @@ -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import os +import sys +import prov.model as pm + +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import rdflib as rdf from ..core import Constants from ..experiment import AcquisitionObject -import prov.model as pm + class MRObject(AcquisitionObject): """Class for NIDM-Experimenent MRAcquisitionObject-Level Objects. @@ -16,8 +19,9 @@ class MRObject(AcquisitionObject): @copyright: University of California, Irvine 2017 """ - #constructor - def __init__(self, acquisition,attributes=None,uuid=None,add_default_type=True): + + # constructor + def __init__(self, acquisition, attributes=None, uuid=None, add_default_type=True): """ Default constructor, creates an acquisition object and links to acquisition activity object @@ -27,17 +31,17 @@ def __init__(self, acquisition,attributes=None,uuid=None,add_default_type=True): :return: none """ - #execute default parent class constructor - super(MRObject,self).__init__(acquisition,attributes,uuid) + # execute default parent class constructor + super(MRObject, self).__init__(acquisition, attributes, uuid) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ENTITY}) - self.add_attributes({Constants.NIDM_ACQUISITION_MODALITY: Constants.NIDM_MRI}) + self.add_attributes( + {Constants.NIDM_ACQUISITION_MODALITY: Constants.NIDM_MRI} + ) - #carry graph object around + # carry graph object around self.graph = acquisition.graph def __str__(self): return "NIDM-Experiment MRI Object Class" - - diff --git a/nidm/experiment/Navigate.py b/nidm/experiment/Navigate.py index 3b655fd8..e682df10 100644 --- a/nidm/experiment/Navigate.py +++ b/nidm/experiment/Navigate.py @@ -1,48 +1,121 @@ -from nidm.core import Constants -from nidm.experiment.Query import OpenGraph, URITail, trimWellKnownURIPrefix, getDataTypeInfo, ACQUISITION_MODALITY, \ - IMAGE_CONTRAST_TYPE, IMAGE_USAGE_TYPE, TASK, expandUUID, matchPrefix -from rdflib import Graph, RDF, URIRef, util, term, Literal -import functools import collections +import functools +from nidm.core import Constants import nidm.experiment.CDE +from nidm.experiment.Query import ( + ACQUISITION_MODALITY, + IMAGE_CONTRAST_TYPE, + IMAGE_USAGE_TYPE, + TASK, + OpenGraph, + URITail, + expandUUID, + getDataTypeInfo, + matchPrefix, + trimWellKnownURIPrefix, +) from nidm.experiment.Utils import validate_uuid +from rdflib import RDF, Graph, Literal, URIRef, term, util + +isa = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") +isPartOf = Constants.DCT["isPartOf"] +ValueType = collections.namedtuple( + "ValueType", + [ + "value", + "label", + "datumType", + "hasUnit", + "isAbout", + "measureOf", + "hasLaterality", + "dataElement", + "description", + "subject", + "project", + "sourceVariable", + ], +) +ActivityData = collections.namedtuple("ActivityData", ["category", "uuid", "data"]) +QUERY_CACHE_SIZE = 0 +BIG_CACHE_SIZE = 0 + + +def makeValueType( + value=None, + label=None, + datumType=None, + hasUnit=None, + isAbout=None, + measureOf=None, + hasLaterality=None, + dataElement=None, + description=None, + subject=None, + project=None, + source_variable=None, +): + return ValueType( + str(value), + str(label), + str(datumType), + str(hasUnit), + str(isAbout), + str(measureOf), + str(hasLaterality), + str(dataElement), + str(description), + str(subject), + str(project), + str(source_variable), + ) -isa = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') -isPartOf = Constants.DCT['isPartOf'] -ValueType = collections.namedtuple('ValueType', - ['value', 'label', 'datumType', 'hasUnit', 'isAbout', 'measureOf', 'hasLaterality', 'dataElement', 'description', 'subject', 'project', 'sourceVariable']) -ActivityData = collections.namedtuple('ActivityData', ['category', 'uuid', 'data']) -QUERY_CACHE_SIZE=0 -BIG_CACHE_SIZE=0 - -def makeValueType(value=None, label=None, datumType=None, hasUnit=None, isAbout=None, measureOf=None, hasLaterality=None, dataElement=None, description=None, subject=None, project=None, source_variable=None): - return ValueType(str(value), str(label), str(datumType), str(hasUnit), str(isAbout), str(measureOf), str(hasLaterality), str(dataElement), str(description), str(subject), str(project), str(source_variable)) - def makeValueTypeFromDataTypeInfo(value, data_type_info_tuple): - if not data_type_info_tuple: data_type_info_tuple = {} - for key in ['label', 'datumType', 'hasUnit', 'isAbout', 'measureOf', 'hasLaterality', 'dataElement', 'description', 'subject', 'project', 'source_variable']: + for key in [ + "label", + "datumType", + "hasUnit", + "isAbout", + "measureOf", + "hasLaterality", + "dataElement", + "description", + "subject", + "project", + "source_variable", + ]: if not key in data_type_info_tuple: data_type_info_tuple[key] = None + return ValueType( + str(value), + str(data_type_info_tuple["label"]), + str(data_type_info_tuple["datumType"]), + str(data_type_info_tuple["hasUnit"]), + str(data_type_info_tuple["isAbout"]), + str(data_type_info_tuple["measureOf"]), + str(data_type_info_tuple["hasLaterality"]), + str(data_type_info_tuple["dataElement"]), + str(data_type_info_tuple["description"]), + str(data_type_info_tuple["subject"]), + str(data_type_info_tuple["project"]), + str(data_type_info_tuple["source_variable"]), + ) - return ValueType(str(value), str(data_type_info_tuple['label']), str(data_type_info_tuple['datumType']), - str(data_type_info_tuple['hasUnit']), str(data_type_info_tuple['isAbout']), str(data_type_info_tuple['measureOf']), - str(data_type_info_tuple['hasLaterality']), str(data_type_info_tuple['dataElement']), - str(data_type_info_tuple['description']), str(data_type_info_tuple['subject']), str(data_type_info_tuple['project']), str(data_type_info_tuple['source_variable'])) def expandID(id, namespace): - ''' + """ If the ID isn't a full URI already, make it one in the given namespace :param id: :param namespace: :return: full URI - ''' - if id.find('http') < 0: + """ + if id.find("http") < 0: return namespace[id] # it has a http, but isn't a URIRef so convert it if type(id) == str: @@ -53,44 +126,46 @@ def expandID(id, namespace): @functools.lru_cache(maxsize=BIG_CACHE_SIZE) def simplifyURIWithPrefix(nidm_file_tuples, uri): - ''' + """ Takes a URI and finds if there is a simple prefix for it in the graph :param rdf_graph: :param uri: :return: simple prefix or the original uri string - ''' + """ @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def getNamespaceLookup(nidm_file_tuples): names = {} for f in nidm_file_tuples: rdf_graph = OpenGraph(f) - for (prefix, uri) in rdf_graph.namespace_manager.namespaces(): + for prefix, uri in rdf_graph.namespace_manager.namespaces(): if not str(uri) in names: names[str(uri)] = prefix return names names = getNamespaceLookup(tuple(nidm_file_tuples)) # strip off the bit of URI after the last / - trimed_uri = str(uri).split('/')[0:-1] - trimed_uri = '/'.join(trimed_uri) + '/' + trimed_uri = str(uri).split("/")[0:-1] + trimed_uri = "/".join(trimed_uri) + "/" if trimed_uri in names: return names[trimed_uri] else: return uri + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def getProjects(nidm_file_tuples): projects = [] for file in nidm_file_tuples: rdf_graph = OpenGraph(file) - #find all the sessions - for (project, p, o) in rdf_graph.triples((None, isa, Constants.NIDM['Project'])): + # find all the sessions + for project, p, o in rdf_graph.triples((None, isa, Constants.NIDM["Project"])): projects.append(project) return projects + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def getSessions(nidm_file_tuples, project_id): project_uri = expandID(project_id, Constants.NIIRI) @@ -98,14 +173,15 @@ def getSessions(nidm_file_tuples, project_id): for file in nidm_file_tuples: rdf_graph = OpenGraph(file) - #find all the sessions - for (session, p, o) in rdf_graph.triples((None, isa, Constants.NIDM['Session'])): - #check if it is part of our project + # find all the sessions + for session, p, o in rdf_graph.triples((None, isa, Constants.NIDM["Session"])): + # check if it is part of our project if (session, isPartOf, project_uri) in rdf_graph: sessions.append(session) return sessions + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def getAcquisitions(nidm_file_tuples, session_id): session_uri = expandID(session_id, Constants.NIIRI) @@ -113,14 +189,15 @@ def getAcquisitions(nidm_file_tuples, session_id): for file in nidm_file_tuples: rdf_graph = OpenGraph(file) - #find all the sessions - for (acq, p, o) in rdf_graph.triples((None, isPartOf, session_uri)): - #check if it is a acquisition - if (acq, isa, Constants.NIDM['Acquisition']) in rdf_graph: + # find all the sessions + for acq, p, o in rdf_graph.triples((None, isPartOf, session_uri)): + # check if it is a acquisition + if (acq, isa, Constants.NIDM["Acquisition"]) in rdf_graph: acquisitions.append(acq) return acquisitions + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def getSubject(nidm_file_tuples, acquisition_id): acquisition_uri = expandID(acquisition_id, Constants.NIIRI) @@ -128,13 +205,22 @@ def getSubject(nidm_file_tuples, acquisition_id): for file in nidm_file_tuples: rdf_graph = OpenGraph(file) - #find all the sessions - for (acq, p, blank) in rdf_graph.triples((acquisition_uri, Constants.PROV['qualifiedAssociation'], None)): - for (s2, p2, sub) in rdf_graph.triples((blank, Constants.PROV['agent'], None)): - if (blank, Constants.PROV['hadRole'], Constants.SIO['Subject']) in rdf_graph: + # find all the sessions + for acq, p, blank in rdf_graph.triples( + (acquisition_uri, Constants.PROV["qualifiedAssociation"], None) + ): + for s2, p2, sub in rdf_graph.triples( + (blank, Constants.PROV["agent"], None) + ): + if ( + blank, + Constants.PROV["hadRole"], + Constants.SIO["Subject"], + ) in rdf_graph: return sub return None + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def getSubjects(nidm_file_tuples, project_id): subjects = set([]) @@ -148,34 +234,40 @@ def getSubjects(nidm_file_tuples, project_id): subjects.add(sub) return subjects + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def getSubjectUUIDsfromID(nidm_file_tuples, sub_id): uuids = [] for file in nidm_file_tuples: rdf_graph = OpenGraph(file) - result = rdf_graph.triples((None, Constants.NDAR['src_subject_id'], None)) - for (s,p,o) in result: + result = rdf_graph.triples((None, Constants.NDAR["src_subject_id"], None)) + for s, p, o in result: if str(o) == str(sub_id): - uuids.append( URITail(s) ) + uuids.append(URITail(s)) return uuids + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def getSubjectIDfromUUID(nidm_file_tuples, subject_uuid): for file in nidm_file_tuples: rdf_graph = OpenGraph(file) - id_generator = rdf_graph.objects(subject=subject_uuid, predicate=Constants.NDAR['src_subject_id']) + id_generator = rdf_graph.objects( + subject=subject_uuid, predicate=Constants.NDAR["src_subject_id"] + ) for id in id_generator: return id return None + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def normalizeSingleSubjectToUUID(nidm_file_tuples, id): if len(getSubjectUUIDsfromID(nidm_file_tuples, id)) > 0: return getSubjectUUIDsfromID(nidm_file_tuples, id)[0] return id + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def getActivities(nidm_file_tuples, subject_id): activities = set([]) @@ -190,22 +282,30 @@ def getActivities(nidm_file_tuples, subject_id): rdf_graph = OpenGraph(file) for subject_uri in sub_uris: subject_uri = expandID(subject_uri, Constants.NIIRI) - for blank_node in rdf_graph.subjects(predicate=Constants.PROV['agent'], object=subject_uri): - for activity in rdf_graph.subjects(predicate=Constants.PROV['qualifiedAssociation'], object=blank_node): - if (activity, isa, Constants.PROV['Activity']) in rdf_graph: + for blank_node in rdf_graph.subjects( + predicate=Constants.PROV["agent"], object=subject_uri + ): + for activity in rdf_graph.subjects( + predicate=Constants.PROV["qualifiedAssociation"], object=blank_node + ): + if (activity, isa, Constants.PROV["Activity"]) in rdf_graph: activities.add(activity) return activities + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def isAStatCollection(nidm_file_tuples, uri): for file in nidm_file_tuples: rdf_graph = OpenGraph(file) - if ((uri, isa, Constants.NIDM['FSStatsCollection']) in rdf_graph ) or \ - ((uri, isa, Constants.NIDM['FSLStatsCollection']) in rdf_graph) or \ - ((uri, isa, Constants.NIDM['ANTSStatsCollection']) in rdf_graph) : + if ( + ((uri, isa, Constants.NIDM["FSStatsCollection"]) in rdf_graph) + or ((uri, isa, Constants.NIDM["FSLStatsCollection"]) in rdf_graph) + or ((uri, isa, Constants.NIDM["ANTSStatsCollection"]) in rdf_graph) + ): return True return False + # def getDataElementInfo(nidm_file_list, id): # # uuid = expandID(id, Constants.NIIRI) @@ -220,6 +320,7 @@ def isAStatCollection(nidm_file_tuples, uri): # # return False + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def getActivityData(nidm_file_tuples, acquisition_id): acquisition_uri = expandID(acquisition_id, Constants.NIIRI) @@ -229,59 +330,85 @@ def getActivityData(nidm_file_tuples, acquisition_id): for file in nidm_file_tuples: rdf_graph = OpenGraph(file) # find everything generated by the acquisition - for (data_object, p1, o1) in rdf_graph.triples((None, Constants.PROV['wasGeneratedBy'], acquisition_uri)): + for data_object, p1, o1 in rdf_graph.triples( + (None, Constants.PROV["wasGeneratedBy"], acquisition_uri) + ): # make sure this is an acquisition object - if (data_object, isa, Constants.NIDM['AcquisitionObject']) in rdf_graph: - category = 'instrument' + if (data_object, isa, Constants.NIDM["AcquisitionObject"]) in rdf_graph: + category = "instrument" # iterate over all the items in the acquisition object - for (s, p, o) in rdf_graph.triples((data_object, None, None)): - + for s, p, o in rdf_graph.triples((data_object, None, None)): dti = getDataTypeInfo(rdf_graph, p) - if (dti): + if dti: # there is a DataElement describing this predicate - value_type = makeValueTypeFromDataTypeInfo(value=trimWellKnownURIPrefix(o), data_type_info_tuple=dti) - result.append( value_type ) + value_type = makeValueTypeFromDataTypeInfo( + value=trimWellKnownURIPrefix(o), data_type_info_tuple=dti + ) + result.append(value_type) else: - #Don't know exactly what this is so just set a label and be done. - if (data_object, isa, Constants.ONLI['assessment-instrument']) in rdf_graph: - result.append(makeValueType(value=trimWellKnownURIPrefix(o), label=simplifyURIWithPrefix(nidm_file_tuples, str(p)))) - #result[ simplifyURIWithPrefix(nidm_file_list, str(p)) ] = trimWellKnownURIPrefix(o) + # Don't know exactly what this is so just set a label and be done. + if ( + data_object, + isa, + Constants.ONLI["assessment-instrument"], + ) in rdf_graph: + result.append( + makeValueType( + value=trimWellKnownURIPrefix(o), + label=simplifyURIWithPrefix( + nidm_file_tuples, str(p) + ), + ) + ) + # result[ simplifyURIWithPrefix(nidm_file_list, str(p)) ] = trimWellKnownURIPrefix(o) else: - result.append(makeValueType(value=trimWellKnownURIPrefix(o), label=URITail(str(p)))) + result.append( + makeValueType( + value=trimWellKnownURIPrefix(o), + label=URITail(str(p)), + ) + ) # result[ URITail(str(p))] = trimWellKnownURIPrefix(o) # or maybe it's a stats collection - elif isAStatCollection (nidm_file_tuples, data_object): - category = 'derivative' - for (s, p, o) in rdf_graph.triples((data_object, None, None)): - cde = getDataTypeInfo(rdf_graph,p ) - result.append( - makeValueTypeFromDataTypeInfo(value=str(o), data_type_info_tuple=cde) + elif isAStatCollection(nidm_file_tuples, data_object): + category = "derivative" + for s, p, o in rdf_graph.triples((data_object, None, None)): + cde = getDataTypeInfo(rdf_graph, p) + result.append( + makeValueTypeFromDataTypeInfo( + value=str(o), data_type_info_tuple=cde ) - # result[ URITail(str(p)) ] = str(o) + ) + # result[ URITail(str(p)) ] = str(o) + + return ActivityData( + category=category, uuid=trimWellKnownURIPrefix(acquisition_uri), data=result + ) - return ActivityData(category=category, uuid=trimWellKnownURIPrefix(acquisition_uri), data=result) @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def GetProjectAttributes(nidm_files_tuple, project_id): result = { ACQUISITION_MODALITY: set([]), IMAGE_CONTRAST_TYPE: set([]), - IMAGE_USAGE_TYPE : set([]), - TASK : set([]) + IMAGE_USAGE_TYPE: set([]), + TASK: set([]), } project_uuid = expandUUID(project_id) for file in nidm_files_tuple: rdf_graph = OpenGraph(file) - #find all the projects - for (project,pred,o) in rdf_graph.triples((None, None, Constants.NIDM['Project'])): - #check if it is our project + # find all the projects + for project, pred, o in rdf_graph.triples( + (None, None, Constants.NIDM["Project"]) + ): + # check if it is our project if str(project) == str(project_uuid): # get all the basic data from the project - for (proj, predicate, object) in rdf_graph.triples((project, None, None)): - result[ matchPrefix(str(predicate)) ] = str(object) + for proj, predicate, object in rdf_graph.triples((project, None, None)): + result[matchPrefix(str(predicate))] = str(object) # now drill into the acquisition objects to get some specific # elements: AcquisitionModality, ImageContrastType, ImageUsageType, Task @@ -291,13 +418,13 @@ def GetProjectAttributes(nidm_files_tuple, project_id): for a in acquistions: acq_obj = getActivityData(nidm_files_tuple, a) for de in acq_obj.data: - if de.label == 'hadAcquisitionModality': + if de.label == "hadAcquisitionModality": result[ACQUISITION_MODALITY].add(de.value) - if de.label == 'hadImageContrastType': + if de.label == "hadImageContrastType": result[IMAGE_CONTRAST_TYPE].add(de.value) - if de.label == 'hadImageUsageType': + if de.label == "hadImageUsageType": result[IMAGE_USAGE_TYPE].add(de.value) - if de.label == 'Task': + if de.label == "Task": result[TASK].add(de.value) # de-set-ify items so they will play nice with JSON later @@ -308,6 +435,7 @@ def GetProjectAttributes(nidm_files_tuple, project_id): return result + @functools.lru_cache(maxsize=BIG_CACHE_SIZE) def GetAllPredicates(nidm_files_tuple): pred_set = set() @@ -326,41 +454,46 @@ def GetDataelements(nidm_files_tuple): for file in nidm_files_tuple: rdf_graph = OpenGraph(file) - #find all the datatypes - for de_uri in rdf_graph.subjects(predicate=isa, object=Constants.NIDM['DataElement']): + # find all the datatypes + for de_uri in rdf_graph.subjects( + predicate=isa, object=Constants.NIDM["DataElement"] + ): if de_uri not in found_uris: # don't add duplicates dti = getDataTypeInfo(rdf_graph, de_uri) - result['data_elements']['uuid'].append(str(dti['dataElementURI'])) - result['data_elements']['label'].append(str(dti['label'])) - result['data_elements']['data_type_info'].append( dti ) + result["data_elements"]["uuid"].append(str(dti["dataElementURI"])) + result["data_elements"]["label"].append(str(dti["label"])) + result["data_elements"]["data_type_info"].append(dti) found_uris.add(de_uri) # find all the datatypes - for de_uri in rdf_graph.subjects(predicate=isa, object=Constants.NIDM['PersonalDataElement']): + for de_uri in rdf_graph.subjects( + predicate=isa, object=Constants.NIDM["PersonalDataElement"] + ): if de_uri not in found_uris: # don't add duplicates dti = getDataTypeInfo(rdf_graph, de_uri) - result['data_elements']['uuid'].append(str(dti['dataElementURI'])) - result['data_elements']['label'].append(str(dti['label'])) - result['data_elements']['data_type_info'].append(dti) + result["data_elements"]["uuid"].append(str(dti["dataElementURI"])) + result["data_elements"]["label"].append(str(dti["label"])) + result["data_elements"]["data_type_info"].append(dti) found_uris.add(de_uri) # now look for any of the CDEs all_predicates = GetAllPredicates(nidm_files_tuple) cde_graph = nidm.experiment.CDE.getCDEs() - cde_types = cde_graph.subjects(predicate=Constants.RDFS['subClassOf'], object=Constants.NIDM['DataElement']) - cde_type_set = set() # i.e. fs:DataElement - known_cde_types = set() # i.e. fs_003579 + cde_types = cde_graph.subjects( + predicate=Constants.RDFS["subClassOf"], object=Constants.NIDM["DataElement"] + ) + cde_type_set = set() # i.e. fs:DataElement + known_cde_types = set() # i.e. fs_003579 for t in cde_types: cde_type_set.add(t) for s in cde_graph.subjects(predicate=isa, object=t): known_cde_types.add(s) - for predicate in all_predicates: if predicate in known_cde_types: dti = getDataTypeInfo(cde_graph, predicate) - result['data_elements']['uuid'].append(str(dti['dataElementURI'])) - result['data_elements']['label'].append(str(dti['label'])) - result['data_elements']['data_type_info'].append(dti) + result["data_elements"]["uuid"].append(str(dti["dataElementURI"])) + result["data_elements"]["label"].append(str(dti["label"])) + result["data_elements"]["data_type_info"].append(dti) return result @@ -370,33 +503,49 @@ def GetDataelementDetails(nidm_files_tuple, dataelement): for file in nidm_files_tuple: rdf_graph = OpenGraph(file) - for de_uri in rdf_graph.subjects(predicate=isa, object=Constants.NIDM['DataElement']): + for de_uri in rdf_graph.subjects( + predicate=isa, object=Constants.NIDM["DataElement"] + ): dti = getDataTypeInfo(rdf_graph, de_uri) # check if this is the correct one - if not (dataelement in [ str(dti['label']), str(dti['dataElement']), str(dti['dataElementURI']) ] ): + if not ( + dataelement + in [ + str(dti["label"]), + str(dti["dataElement"]), + str(dti["dataElementURI"]), + ] + ): continue for key in dti.keys(): result[key] = dti[key] - result['inProjects'] = set() + result["inProjects"] = set() # figure out what project the dataelement was used in uri = dti["dataElementURI"] a_list = rdf_graph.subjects(predicate=uri) - for a in a_list: # a is an assessment / AcquisitionObject - b_list = rdf_graph.objects(subject=a, predicate=Constants.PROV['wasGeneratedBy']) - for b in b_list: # b is an Acquisition / Activity - c_list = rdf_graph.objects(subject=b, predicate=Constants.DCT['isPartOf']) - for c in c_list: # c is a session - d_list = rdf_graph.objects(subject=c, predicate=Constants.DCT['isPartOf']) - for d in d_list: # d is most likely a project - if d in rdf_graph.subjects(predicate=isa, object=Constants.NIDM['Project']): - result['inProjects'].add("{} ({})".format(str(d), file)) - - return result # found it, we are done + for a in a_list: # a is an assessment / AcquisitionObject + b_list = rdf_graph.objects( + subject=a, predicate=Constants.PROV["wasGeneratedBy"] + ) + for b in b_list: # b is an Acquisition / Activity + c_list = rdf_graph.objects( + subject=b, predicate=Constants.DCT["isPartOf"] + ) + for c in c_list: # c is a session + d_list = rdf_graph.objects( + subject=c, predicate=Constants.DCT["isPartOf"] + ) + for d in d_list: # d is most likely a project + if d in rdf_graph.subjects( + predicate=isa, object=Constants.NIDM["Project"] + ): + result["inProjects"].add("{} ({})".format(str(d), file)) + return result # found it, we are done if result == {}: # didn't find it yet, check the CDEs cde_graph = nidm.experiment.CDE.getCDEs() @@ -404,20 +553,26 @@ def GetDataelementDetails(nidm_files_tuple, dataelement): dti = getDataTypeInfo(cde_graph, de_uri) # check if this is the correct one - if not (dataelement in [str(dti['label']), str(dti['dataElement']), str(dti['dataElementURI'])]): + if not ( + dataelement + in [ + str(dti["label"]), + str(dti["dataElement"]), + str(dti["dataElementURI"]), + ] + ): continue for key in dti.keys(): result[key] = dti[key] - result['inProjects'] = set() - result['inProjects'].add("Common Data Element") + result["inProjects"] = set() + result["inProjects"].add("Common Data Element") for file in nidm_files_tuple: rdf_graph = OpenGraph(file) - if result['dataElementURI'] in rdf_graph.predicates(): - result['inProjects'].add(file) - + if result["dataElementURI"] in rdf_graph.predicates(): + result["inProjects"].add(file) - return result # found it, we are done + return result # found it, we are done - return result \ No newline at end of file + return result diff --git a/nidm/experiment/PETAcquisition.py b/nidm/experiment/PETAcquisition.py index ef48a57f..160cedbe 100644 --- a/nidm/experiment/PETAcquisition.py +++ b/nidm/experiment/PETAcquisition.py @@ -1,20 +1,23 @@ -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import os +import sys +import prov.model as pm + +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import rdflib as rdf -from ..experiment import Acquisition from ..core import Constants -import prov.model as pm +from ..experiment import Acquisition + class PETAcquisition(Acquisition): """ - Default constructor, creates a session activity and links to project object + Default constructor, creates a session activity and links to project object - :param session: a session object + :param session: a session object """ - #constructor - def __init__(self, session,attributes=None, uuid=None, add_default_type=True): + # constructor + def __init__(self, session, attributes=None, uuid=None, add_default_type=True): """ Default constructor, creates an acquisition object and links to acquisition activity object @@ -24,17 +27,16 @@ def __init__(self, session,attributes=None, uuid=None, add_default_type=True): :return: none """ - #execute default parent class constructor - #execute default parent class constructor - super(PETAcquisition,self).__init__(session,attributes,uuid) - #acquisition.graph._add_record(self) + # execute default parent class constructor + # execute default parent class constructor + super(PETAcquisition, self).__init__(session, attributes, uuid) + # acquisition.graph._add_record(self) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ACTIVITY}) - #carry graph object around + # carry graph object around self.graph = session.graph - def __str__(self): return "NIDM-Experiment PET Acquisition Class" diff --git a/nidm/experiment/PETObject.py b/nidm/experiment/PETObject.py index ce22bc4e..f1edadc4 100644 --- a/nidm/experiment/PETObject.py +++ b/nidm/experiment/PETObject.py @@ -1,9 +1,12 @@ -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import os +import sys +import prov.model as pm + +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import rdflib as rdf from ..core import Constants from ..experiment import AcquisitionObject -import prov.model as pm + class PETObject(AcquisitionObject): """Class for NIDM-Experimenent MRAcquisitionObject-Level Objects. @@ -16,8 +19,9 @@ class PETObject(AcquisitionObject): @copyright: University of California, Irvine 2017 """ - #constructor - def __init__(self, acquisition,attributes=None,uuid=None,add_default_type=True): + + # constructor + def __init__(self, acquisition, attributes=None, uuid=None, add_default_type=True): """ Default constructor, creates an acquisition object and links to acquisition activity object @@ -27,17 +31,17 @@ def __init__(self, acquisition,attributes=None,uuid=None,add_default_type=True): :return: none """ - #execute default parent class constructor - super(PETObject,self).__init__(acquisition,attributes,uuid) + # execute default parent class constructor + super(PETObject, self).__init__(acquisition, attributes, uuid) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ENTITY}) - self.add_attributes({Constants.NIDM_ACQUISITION_MODALITY: Constants.NIDM_PET}) + self.add_attributes( + {Constants.NIDM_ACQUISITION_MODALITY: Constants.NIDM_PET} + ) - #carry graph object around + # carry graph object around self.graph = acquisition.graph def __str__(self): return "NIDM-Experiment PET Object Class" - - diff --git a/nidm/experiment/Project.py b/nidm/experiment/Project.py index 64b3a77e..040e0ea3 100644 --- a/nidm/experiment/Project.py +++ b/nidm/experiment/Project.py @@ -1,20 +1,20 @@ -import rdflib as rdf -import os, sys -import prov.model as pm import json -from rdflib import Graph, RDF, URIRef, util, term +import os +import sys +import prov.model as pm +import rdflib as rdf +from rdflib import RDF, Graph, URIRef, term, util from rdflib.namespace import split_uri import validators - -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from ..core import Constants -#import NIDMExperimentCore -from ..experiment.Core import Core -from ..experiment.Core import getUUID +# import NIDMExperimentCore +from ..experiment.Core import Core, getUUID -class Project(pm.ProvActivity,Core): + +class Project(pm.ProvActivity, Core): """Class for NIDM-Experiment Project-Level Objects. Default constructor uses empty graph with namespaces added from NIDM/Scripts/Constants.py. @@ -25,8 +25,11 @@ class Project(pm.ProvActivity,Core): @copyright: University of California, Irvine 2017 """ - #constructor, adds project - def __init__(self,attributes=None, empty_graph=False, uuid=None,add_default_type=True): + + # constructor, adds project + def __init__( + self, attributes=None, empty_graph=False, uuid=None, add_default_type=True + ): """ Default constructor, creates document and adds Project activity to graph with optional attributes @@ -36,7 +39,7 @@ def __init__(self,attributes=None, empty_graph=False, uuid=None,add_default_type """ - if (empty_graph): + if empty_graph: self.graph = Constants.NIDMDocument(namespaces=None) else: self.graph = Constants.NIDMDocument(namespaces=Constants.namespaces) @@ -44,26 +47,37 @@ def __init__(self,attributes=None, empty_graph=False, uuid=None,add_default_type if uuid is None: self._uuid = getUUID() - #execute default parent class constructor - super(Project,self).__init__(self.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) + # execute default parent class constructor + super(Project, self).__init__( + self.graph, + pm.QualifiedName( + pm.Namespace("niiri", Constants.NIIRI), self.get_uuid() + ), + attributes, + ) else: self._uuid = uuid - #execute default parent class constructor - super(Project,self).__init__(self.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) - - #add record to graph + # execute default parent class constructor + super(Project, self).__init__( + self.graph, + pm.QualifiedName( + pm.Namespace("niiri", Constants.NIIRI), self.get_uuid() + ), + attributes, + ) + + # add record to graph self.graph._add_record(self) - #create empty sessions list - self._sessions=[] - #create empty derivatives list - self._derivatives=[] + # create empty sessions list + self._sessions = [] + # create empty derivatives list + self._derivatives = [] # create empty data elements list - self._dataelements=[] + self._dataelements = [] if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT}) - @property def sessions(self): return self._sessions @@ -77,8 +91,7 @@ def derivatives(self): def dataelements(self): return self._dataelements - - def add_sessions(self,session): + def add_sessions(self, session): """ Adds session to project, creating links and adding reference to sessions list @@ -89,12 +102,15 @@ def add_sessions(self,session): if session in self._sessions: return False else: - #add session to self.sessions list + # add session to self.sessions list self._sessions.extend([session]) - #create links in graph - #session.add_attributes({str("dct:isPartOf"):self}) - session.add_attributes({pm.QualifiedName(pm.Namespace("dct",Constants.DCT),'isPartOf'):self}) + # create links in graph + # session.add_attributes({str("dct:isPartOf"):self}) + session.add_attributes( + {pm.QualifiedName(pm.Namespace("dct", Constants.DCT), "isPartOf"): self} + ) return True + def get_sessions(self): return self._sessions @@ -117,7 +133,9 @@ def add_derivatives(self, derivative): self._derivatives.extend([derivative]) # create links in graph # session.add_attributes({str("dct:isPartOf"):self}) - derivative.add_attributes({pm.QualifiedName(pm.Namespace("dct", Constants.DCT), 'isPartOf'): self}) + derivative.add_attributes( + {pm.QualifiedName(pm.Namespace("dct", Constants.DCT), "isPartOf"): self} + ) return True def add_dataelements(self, dataelement): @@ -133,15 +151,12 @@ def add_dataelements(self, dataelement): self._dataelements.extend([dataelement]) # create links in graph # session.add_attributes({str("dct:isPartOf"):self}) - #dataelement.add_attributes({pm.QualifiedName(pm.Namespace("dct", Constants.DCT), 'isPartOf'): self}) + # dataelement.add_attributes({pm.QualifiedName(pm.Namespace("dct", Constants.DCT), 'isPartOf'): self}) return True def __str__(self): return "NIDM-Experiment Project Class" - - - - sessions = property(get_sessions,add_sessions) - derivatives = property(get_derivatives,add_derivatives) - dataelements = property(get_dataelements,add_dataelements) + sessions = property(get_sessions, add_sessions) + derivatives = property(get_derivatives, add_derivatives) + dataelements = property(get_dataelements, add_dataelements) diff --git a/nidm/experiment/Query.py b/nidm/experiment/Query.py index 1581accd..d3b959b2 100644 --- a/nidm/experiment/Query.py +++ b/nidm/experiment/Query.py @@ -1,165 +1,164 @@ -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # nidm_query.py # License: GPL -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # Date: 8-1-18 Coded by: David Keator (dbkeator@gmail.com) # Filename: nidm_query.py # # Program description: This program provides query functionality for NIDM-Experiment files # # -#************************************************************************************** +# ************************************************************************************** # Development environment: Python - PyCharm IDE # -#************************************************************************************** +# ************************************************************************************** # System requirements: Python 3.X # Libraries: os, sys, rdflib, pandas, argparse, logging -#************************************************************************************** +# ************************************************************************************** # Start date: 8-1-18 # Update history: # DATE MODIFICATION Who # # -#************************************************************************************** +# ************************************************************************************** # Programmer comments: # # -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** +import functools +import hashlib +import json +import logging import os +from os import environ, path +import pickle +import re import sys +import tempfile from urllib.request import urlretrieve - -import rdflib -from rdflib import Graph, URIRef, util -import pandas as pd -import logging +from joblib import Memory from nidm.core import Constants import nidm.experiment.CDE -import re -import tempfile -from os import path, environ -import functools -import hashlib -import pickle +import pandas as pd +import rdflib +from rdflib import Graph, URIRef, util import requests -import json +memory = Memory(tempfile.gettempdir(), verbose=0) -from joblib import Memory -memory = Memory(tempfile.gettempdir(), verbose=0 ) +QUERY_CACHE_SIZE = 64 +BIG_CACHE_SIZE = 256 +LARGEST_CACHE_SIZE = 4096 +ACQUISITION_MODALITY = "AcquisitionModality" +IMAGE_CONTRAST_TYPE = "ImageContrastType" +IMAGE_USAGE_TYPE = "ImageUsageType" +TASK = "Task" -QUERY_CACHE_SIZE=64 -BIG_CACHE_SIZE=256 -LARGEST_CACHE_SIZE=4096 -ACQUISITION_MODALITY = 'AcquisitionModality' -IMAGE_CONTRAST_TYPE = 'ImageContrastType' -IMAGE_USAGE_TYPE = 'ImageUsageType' -TASK = 'Task' -def sparql_query_nidm(nidm_file_list,query, output_file=None, return_graph=False): - ''' +def sparql_query_nidm(nidm_file_list, query, output_file=None, return_graph=False): + """ :param nidm_file_list: List of NIDM.ttl files to execute query on :param query: SPARQL query string :param output_file: Optional output file to write results :param return_graph: WIP - not working right now but for some queries we prefer to return a graph instead of a dataframe :return: dataframe | graph depending on return_graph parameter - ''' - - + """ - if 'BLAZEGRAPH_URL' in environ.keys(): + if "BLAZEGRAPH_URL" in environ.keys(): try: # first make sure all files are loaded into blazegraph for nidm_file in nidm_file_list: OpenGraph(nidm_file) - logging.debug("Sending sparql to blazegraph: %s", query ) - r2 = requests.post(url=environ['BLAZEGRAPH_URL'], params={'query': query}, headers={'Accept': 'application/sparql-results+json'}) - content = json.loads( r2.content ) + logging.debug("Sending sparql to blazegraph: %s", query) + r2 = requests.post( + url=environ["BLAZEGRAPH_URL"], + params={"query": query}, + headers={"Accept": "application/sparql-results+json"}, + ) + content = json.loads(r2.content) columns = {} - for key in content["head"]['vars']: - columns[key] = [x[key]['value'] for x in content['results']['bindings']] + for key in content["head"]["vars"]: + columns[key] = [x[key]["value"] for x in content["results"]["bindings"]] df = pd.DataFrame(data=columns) - if (output_file is not None): + if output_file is not None: df.to_csv(output_file) return df except Exception as e: - print("Exception while communicating with blazegraph at {}: {}".format(environ['BLAZEGRAPH_URL'],e)) + print( + "Exception while communicating with blazegraph at {}: {}".format( + environ["BLAZEGRAPH_URL"], e + ) + ) - - #query result list + # query result list results = [] + logging.info("Query: %s", query) - logging.info("Query: %s" , query) - - first_file=True - #cycle through NIDM files, adding query result to list + first_file = True + # cycle through NIDM files, adding query result to list for nidm_file in nidm_file_list: - # project=read_nidm(nidm_file) - #read RDF file into temporary graph + # read RDF file into temporary graph # rdf_graph = Graph() # rdf_graph_parse = rdf_graph.parse(nidm_file,format=util.guess_format(nidm_file)) rdf_graph_parse = OpenGraph(nidm_file) - if not return_graph: - #execute query + # execute query qres = rdf_graph_parse.query(query) - #if this is the first file then grab the SPARQL bound variable names from query result for column headings of query result + # if this is the first file then grab the SPARQL bound variable names from query result for column headings of query result if first_file: - #format query result as dataframe and return - #for dicts in qres._get_bindings(): + # format query result as dataframe and return + # for dicts in qres._get_bindings(): columns = [str(var) for var in qres.vars] - first_file=False + first_file = False # break - #append result as row to result list + # append result as row to result list for row in qres: results.append(list(row)) else: - #execute query + # execute query qres = rdf_graph_parse.query(query) if first_file: - #create graph - #WIP: qres_graph = Graph().parse(data=qres.serialize(format='turtle')) - qres_graph = qres.serialize(format='turtle') - first_file=False + # create graph + # WIP: qres_graph = Graph().parse(data=qres.serialize(format='turtle')) + qres_graph = qres.serialize(format="turtle") + first_file = False else: - #WIP qres_graph = qres_graph + Graph().parse(data=qres.serialize(format='turtle')) - qres_graph = qres_graph + qres.serialize(format='turtle') - - + # WIP qres_graph = qres_graph + Graph().parse(data=qres.serialize(format='turtle')) + qres_graph = qres_graph + qres.serialize(format="turtle") if not return_graph: - #convert results list to Pandas DataFrame and return - df = pd.DataFrame(results,columns=columns) + # convert results list to Pandas DataFrame and return + df = pd.DataFrame(results, columns=columns) - #if output file parameter specified - if (output_file is not None): + # if output file parameter specified + if output_file is not None: df.to_csv(output_file) return df else: return qres_graph -def GetProjectsUUID(nidm_file_list,output_file=None): - ''' +def GetProjectsUUID(nidm_file_list, output_file=None): + """ :param nidm_file_list: List of one or more NIDM files to query across for list of Projects :return: list of Project UUIDs - ''' + """ - #SPARQL query to get project UUIDs - query = ''' + # SPARQL query to get project UUIDs + query = """ PREFIX nidm: PREFIX rdf: @@ -170,22 +169,23 @@ def GetProjectsUUID(nidm_file_list,output_file=None): } } - ''' + """ df = sparql_query_nidm(nidm_file_list, query, output_file=output_file) - return df['uuid'] if type(df['uuid']) == list else df['uuid'].tolist() + return df["uuid"] if type(df["uuid"]) == list else df["uuid"].tolist() + def GetProjectLocation(nidm_file_list, project_uuid, output_file=None): - ''' + """ This query will return the prov:Location value for project_uuid :param nidm_file_list: List of one or more NIDM files to query across for list of Projects :param output_file: Optional output file :return: list of Project prov:Locations - ''' + """ # SPARQL query to get project UUIDs - query = ''' + query = """ PREFIX nidm: PREFIX rdf: prefix prov: @@ -198,16 +198,16 @@ def GetProjectLocation(nidm_file_list, project_uuid, output_file=None): } } - ''' + """ df = sparql_query_nidm(nidm_file_list, query, output_file=output_file) - return df['location'].tolist() + return df["location"].tolist() -def testprojectmeta(nidm_file_list): +def testprojectmeta(nidm_file_list): import json - query = ''' + query = """ prefix nidm: prefix rdf: @@ -219,24 +219,25 @@ def testprojectmeta(nidm_file_list): } - ''' + """ - df =sparql_query_nidm(nidm_file_list,query, output_file=None) + df = sparql_query_nidm(nidm_file_list, query, output_file=None) output_json = {} - for index,row in df.iterrows(): - if row['uuid'] not in output_json: - output_json[row['uuid']] = {} + for index, row in df.iterrows(): + if row["uuid"] not in output_json: + output_json[row["uuid"]] = {} - output_json[row['uuid']][row['p']] = row['o'] + output_json[row["uuid"]][row["p"]] = row["o"] return json.dumps(output_json) -def GetProjectSessionsMetadata(nidm_file_list, project_uuid): +def GetProjectSessionsMetadata(nidm_file_list, project_uuid): import json - query = ''' + query = ( + """ prefix nidm: prefix rdf: @@ -249,24 +250,27 @@ def GetProjectSessionsMetadata(nidm_file_list, project_uuid): ?p ?o . } - ''' % project_uuid + """ + % project_uuid + ) - df =sparql_query_nidm(nidm_file_list,query, output_file=None) + df = sparql_query_nidm(nidm_file_list, query, output_file=None) - #outermost dictionary + # outermost dictionary output_json = {} - for index,row in df.iterrows(): + for index, row in df.iterrows(): if project_uuid not in output_json: - #creates dictionary for project UUID + # creates dictionary for project UUID output_json[project_uuid] = {} - if row['session_uuid'] not in output_json[project_uuid]: - #creates a dictionary under project_uuid dictionary for session - output_json[project_uuid][row['session_uuid']] = {} + if row["session_uuid"] not in output_json[project_uuid]: + # creates a dictionary under project_uuid dictionary for session + output_json[project_uuid][row["session_uuid"]] = {} - output_json[project_uuid][row['session_uuid']][row['p']] = row['o'] + output_json[project_uuid][row["session_uuid"]][row["p"]] = row["o"] return json.dumps(output_json) + def GetDataElementProperties(nidm_file_list): """ This function will return a dictionary of data element properties for data_element_uuid @@ -275,7 +279,7 @@ def GetDataElementProperties(nidm_file_list): :return: """ - query=''' + query = """ select distinct ?uuid ?DataElements ?property ?value where { @@ -283,11 +287,12 @@ def GetDataElementProperties(nidm_file_list): ?uuid a/rdfs:subClassOf* nidm:DataElement ; ?property ?value . - }''' + }""" - df = sparql_query_nidm(nidm_file_list.split(','), query, output_file=None) + df = sparql_query_nidm(nidm_file_list.split(","), query, output_file=None) return df + def GetProjectInstruments(nidm_file_list, project_id): """ Returns a list of unique instrument types. For NIDM files this is rdf:type onli:assessment-instrument @@ -296,7 +301,8 @@ def GetProjectInstruments(nidm_file_list, project_id): :param project_id: identifier of project you'd like to search for unique instruments :return: Dataframe of instruments and project titles """ - query = ''' + query = ( + """ PREFIX prov: PREFIX sio: PREFIX dct: @@ -315,23 +321,26 @@ def GetProjectInstruments(nidm_file_list, project_id): FILTER( (!regex(str(?assessment_type), "http://www.w3.org/ns/prov#Entity")) && (!regex(str(?assessment_type), "http://purl.org/nidash/nidm#AcquisitionObject")) && (regex(str(?project), "%s")) ) } - ''' % project_id - logging.info('Query: %s', query) + """ + % project_id + ) + logging.info("Query: %s", query) df = sparql_query_nidm(nidm_file_list, query, output_file=None) results = df.to_dict() logging.info(results) - return df + def GetInstrumentVariables(nidm_file_list, project_id): - ''' + """ This function will return a comprehensive list of variables as part of any project instrument :param nidm_file_list: List of one or more NIDM files to query across for list of Projects :param project_id: identifier of project you'd like to search for unique instruments :return: Dataframe of instruments, project titles, and variables - ''' - query = ''' + """ + query = ( + """ PREFIX prov: PREFIX sio: PREFIX dct: @@ -351,23 +360,25 @@ def GetInstrumentVariables(nidm_file_list, project_id): FILTER( (!regex(str(?assessment_type), "http://www.w3.org/ns/prov#Entity")) && (!regex(str(?assessment_type), "http://purl.org/nidash/nidm#AcquisitionObject")) && (regex(str(?project), "%s")) ) } - ''' % project_id - logging.info('Query: %s', query) + """ + % project_id + ) + logging.info("Query: %s", query) df = sparql_query_nidm(nidm_file_list, query, output_file=None) results = df.to_dict() logging.info(results) - return df -def GetParticipantIDs(nidm_file_list,output_file=None): - ''' + +def GetParticipantIDs(nidm_file_list, output_file=None): + """ This query will return a list of all prov:agent entity UUIDs that prov:hadRole sio:Subject or Constants.NIDM_PARTICIPANT :param nidm_file_list: List of one or more NIDM files to query across for list of Projects :return: list of Constants.NIDM_PARTICIPANT UUIDs and Constants.NIDM_SUBJECTID - ''' + """ - query = ''' + query = """ PREFIX prov: PREFIX sio: @@ -386,14 +397,18 @@ def GetParticipantIDs(nidm_file_list,output_file=None): ?uuid %s ?ID . } - ''' %(Constants.NIDM_PARTICIPANT,Constants.NIDM_SUBJECTID) + """ % ( + Constants.NIDM_PARTICIPANT, + Constants.NIDM_SUBJECTID, + ) - df = sparql_query_nidm(nidm_file_list,query, output_file=output_file) + df = sparql_query_nidm(nidm_file_list, query, output_file=output_file) return df -def GetParticipantIDFromAcquisition(nidm_file_list,acquisition, output_file=None): - ''' + +def GetParticipantIDFromAcquisition(nidm_file_list, acquisition, output_file=None): + """ This function will return the participant ID of the participant with a qualified association of prov:hadRole sio:Subject. @@ -401,9 +416,9 @@ def GetParticipantIDFromAcquisition(nidm_file_list,acquisition, output_file=None :param acquisition: nidm acquisition UUID to search for qualified association :param output_file: optional output filename :return: a dataframe subject ID and prov:Agent UUID of participant with qualified association - ''' + """ - query = ''' + query = """ PREFIX prov: PREFIX sio: @@ -423,21 +438,25 @@ def GetParticipantIDFromAcquisition(nidm_file_list,acquisition, output_file=None ?uuid %s ?ID . } - ''' % (acquisition, Constants.NIDM_PARTICIPANT, Constants.NIDM_SUBJECTID) + """ % ( + acquisition, + Constants.NIDM_PARTICIPANT, + Constants.NIDM_SUBJECTID, + ) df = sparql_query_nidm(nidm_file_list, query, output_file=output_file) return df -def GetParticipantDetails(nidm_file_list,project_id, participant_id, output_file=None): - ''' +def GetParticipantDetails(nidm_file_list, project_id, participant_id, output_file=None): + """ This query will return a list of all prov:agent entity UUIDs that prov:hadRole Constants.NIDM_PARTICIPANT :param nidm_file_list: List of one or more NIDM files to query across for list of Projects :return: list of Constants.NIDM_PARTICIPANT UUIDs and Constants.NIDM_SUBJECTID - ''' + """ - query = ''' + query = """ PREFIX prov: PREFIX sio: @@ -465,9 +484,13 @@ def GetParticipantDetails(nidm_file_list,project_id, participant_id, output_file FILTER(regex(str(?uuid), "%s")). } - ''' %(Constants.NIDM_PARTICIPANT,Constants.NIDM_SUBJECTID, participant_id) + """ % ( + Constants.NIDM_PARTICIPANT, + Constants.NIDM_SUBJECTID, + participant_id, + ) - df = sparql_query_nidm(nidm_file_list,query, output_file=output_file) + df = sparql_query_nidm(nidm_file_list, query, output_file=output_file) data = df.values uuid = "" @@ -476,54 +499,66 @@ def GetParticipantDetails(nidm_file_list,project_id, participant_id, output_file uuid = data[0][0] id = data[0][1] - result = { 'uuid' : str(uuid).replace(Constants.NIIRI, ""), - 'id' : str(id), - 'activity': [] } + result = { + "uuid": str(uuid).replace(Constants.NIIRI, ""), + "id": str(id), + "activity": [], + } for row in data: act = (str(row[2])).replace(str(Constants.NIIRI), "") - (result['activity']).append( act ) + (result["activity"]).append(act) - result["instruments"] = GetParticipantInstrumentData(nidm_file_list, project_id, participant_id) + result["instruments"] = GetParticipantInstrumentData( + nidm_file_list, project_id, participant_id + ) - result["derivatives"] = GetDerivativesDataForSubject(nidm_file_list, None, participant_id) + result["derivatives"] = GetDerivativesDataForSubject( + nidm_file_list, None, participant_id + ) return result + def GetMergedGraph(nidm_file_list): rdf_graph = Graph() for f in nidm_file_list: rdf_graph.parse(f, format=util.guess_format(f)) return rdf_graph + def GetNameForDataElement(graph, uri): label = isAbout = source_variable = None - - for data_element, predicate, value in graph.triples( (uri, None, None) ): - if predicate == Constants.NIDM['source_variable']: + for data_element, predicate, value in graph.triples((uri, None, None)): + if predicate == Constants.NIDM["source_variable"]: source_variable = str(value) - if predicate == Constants.NIDM['isAbout']: + if predicate == Constants.NIDM["isAbout"]: isAbout = str(value) - if predicate == Constants.RDFS['label']: + if predicate == Constants.RDFS["label"]: label = str(value) return source_variable or label or isAbout or URITail(uri) -def GetParticipantInstrumentData(nidm_file_list ,project_id, participant_id): - return GetParticipantInstrumentDataCached(tuple(nidm_file_list) ,project_id, participant_id) +def GetParticipantInstrumentData(nidm_file_list, project_id, participant_id): + return GetParticipantInstrumentDataCached( + tuple(nidm_file_list), project_id, participant_id + ) + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) -def GetParticipantInstrumentDataCached(nidm_file_list: tuple ,project_id, participant_id): - ''' +def GetParticipantInstrumentDataCached( + nidm_file_list: tuple, project_id, participant_id +): + """ This query will return a list of all instrument data for prov:agent entity UUIDs that has prov:hadRole sio:Subject or Constants.NIDM_PARTICIPANT :param nidm_file_list: List of one or more NIDM files to query across for list of Projects :return: list of Constants.NIDM_PARTICIPANT UUIDs and Constants.NIDM_SUBJECTID - ''' + """ - if participant_id.find('http') != 0: + if participant_id.find("http") != 0: participant_id = Constants.NIIRI[participant_id] result = {} @@ -534,21 +569,27 @@ def GetParticipantInstrumentDataCached(nidm_file_list: tuple ,project_id, partic if not n in names: names.append(n) - isa = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') + isa = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") for f in nidm_file_list: rdf_graph = OpenGraph(f) # find all the instrument based assessments - for acquisition in rdf_graph.subjects(isa, Constants.NIDM['Acquisition']): + for acquisition in rdf_graph.subjects(isa, Constants.NIDM["Acquisition"]): # verify that the assessment is linked to a subject through a blank node - for blanknode in rdf_graph.objects(subject=acquisition,predicate=Constants.PROV['qualifiedAssociation']): + for blanknode in rdf_graph.objects( + subject=acquisition, predicate=Constants.PROV["qualifiedAssociation"] + ): # check to see if this assessment is about our participant - if ((blanknode, Constants.PROV['agent'], participant_id) in rdf_graph) : + if (blanknode, Constants.PROV["agent"], participant_id) in rdf_graph: # now we know that the assessment is one we want, find the actual assessment data - for instrument in rdf_graph.subjects(predicate=Constants.PROV['wasGeneratedBy'], object=acquisition): - #load up all the assement data into the result - instrument_key = str(instrument).split('/')[-1] + for instrument in rdf_graph.subjects( + predicate=Constants.PROV["wasGeneratedBy"], object=acquisition + ): + # load up all the assement data into the result + instrument_key = str(instrument).split("/")[-1] result[instrument_key] = {} - for s,data_element,o in rdf_graph.triples((instrument, None, None)): + for s, data_element, o in rdf_graph.triples( + (instrument, None, None) + ): # convert the random looking URIs to the prefix used in the ttl file, if any matches = [n[0] for n in names if n[1] == data_element] if len(matches) > 0: @@ -556,67 +597,93 @@ def GetParticipantInstrumentDataCached(nidm_file_list: tuple ,project_id, partic else: # idx = str(data_element) idx = GetNameForDataElement(rdf_graph, data_element) - result[instrument_key][ idx ] = str(str(o)) - + result[instrument_key][idx] = str(str(o)) return result -def GetParticipantUUIDsForProject(nidm_file_list: tuple, project_id, filter=None, output_file=None): - return GetParticipantUUIDsForProjectCached(tuple(nidm_file_list), project_id, filter, output_file) + +def GetParticipantUUIDsForProject( + nidm_file_list: tuple, project_id, filter=None, output_file=None +): + return GetParticipantUUIDsForProjectCached( + tuple(nidm_file_list), project_id, filter, output_file + ) + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) -def GetParticipantUUIDsForProjectCached(nidm_file_list:tuple, project_id, filter=None, output_file=None): - ''' +def GetParticipantUUIDsForProjectCached( + nidm_file_list: tuple, project_id, filter=None, output_file=None +): + """ This query will return a list of all prov:agent entity UUIDs within a single project that prov:hadRole sio:Subject or Constants.NIDM_PARTICIPANT :param filter: :param nidm_file_list: List of one or more NIDM files to query across for list of Projects :return: list of Constants.NIDM_PARTICIPANT UUIDs and Constants.NIDM_SUBJECTID - ''' + """ # if this isn't already a URI, make it one. # calls from the REST api don't include the URI project = project_id - if project_id.find('http') < 0: + if project_id.find("http") < 0: project = Constants.NIIRI[project_id] ### added by DBK changed to dictionary to support subject ids along with uuids - #participants = [] + # participants = [] participants = {} participants["uuid"] = [] participants["subject id"] = [] - for file in nidm_file_list: rdf_graph = OpenGraph(file) - #find all the sessions - for (session, p, o) in rdf_graph.triples((None, None, Constants.NIDM['Session'])): #rdf_graph.subjects(object=isa, predicate=Constants.NIDM['Session']): - #check if it is part of our project - if (session, Constants.DCT['isPartOf'], project) in rdf_graph: - #find all the activities/acquisitions/etc that are part of this session - for activity in rdf_graph.subjects(predicate=Constants.DCT['isPartOf'], object=session): + # find all the sessions + for session, p, o in rdf_graph.triples( + (None, None, Constants.NIDM["Session"]) + ): # rdf_graph.subjects(object=isa, predicate=Constants.NIDM['Session']): + # check if it is part of our project + if (session, Constants.DCT["isPartOf"], project) in rdf_graph: + # find all the activities/acquisitions/etc that are part of this session + for activity in rdf_graph.subjects( + predicate=Constants.DCT["isPartOf"], object=session + ): # look to see if the activity is linked to a subject via blank node - for blank in rdf_graph.objects(subject=activity, predicate=Constants.PROV['qualifiedAssociation']): - if (blank, Constants.PROV['hadRole'], Constants.SIO['Subject']): - for participant in rdf_graph.objects(subject=blank, predicate=Constants.PROV['agent']): - uuid = (str(participant)).split('/')[-1] # strip off the http://whatever/whatever/ - if (not uuid in participants) and \ - ( (not filter) or CheckSubjectMatchesFilter( tuple([file]) , project, participant, filter) ): + for blank in rdf_graph.objects( + subject=activity, + predicate=Constants.PROV["qualifiedAssociation"], + ): + if (blank, Constants.PROV["hadRole"], Constants.SIO["Subject"]): + for participant in rdf_graph.objects( + subject=blank, predicate=Constants.PROV["agent"] + ): + uuid = (str(participant)).split("/")[ + -1 + ] # strip off the http://whatever/whatever/ + if (not uuid in participants) and ( + (not filter) + or CheckSubjectMatchesFilter( + tuple([file]), project, participant, filter + ) + ): ### added by DBK for subject IDs as well ### - for id in rdf_graph.objects(subject=participant,predicate=URIRef(Constants.NIDM_SUBJECTID.uri)): - subid = (str(id)).split('/')[-1] # strip off the http://whatever/whatever/ + for id in rdf_graph.objects( + subject=participant, + predicate=URIRef(Constants.NIDM_SUBJECTID.uri), + ): + subid = (str(id)).split("/")[ + -1 + ] # strip off the http://whatever/whatever/ ### added by DBK for subject IDs as well ### - #participants.append(uuid) - if ( not uuid in participants['uuid'] ): + # participants.append(uuid) + if not uuid in participants["uuid"]: try: - participants['uuid'].append(uuid) - participants['subject id'].append(subid) + participants["uuid"].append(uuid) + participants["subject id"].append(subid) # just in case there's no subject id in the file... except: - #participants.append(uuid) - participants['uuid'].append(uuid) - participants['subject id'].append('') + # participants.append(uuid) + participants["uuid"].append(uuid) + participants["subject id"].append("") return participants @@ -624,38 +691,47 @@ def GetParticipantUUIDsForProjectCached(nidm_file_list:tuple, project_id, filter # if this isn't already a URI, make it one. # calls from the REST api don't include the URI def expandUUID(partial_uuid): - ''' + """ Expands a uuid (which is the local part of a qname) to the proper full URI :param partial_uuid: UUID without the initial URI :return: full URI of UUID - ''' + """ uuid = partial_uuid - if partial_uuid.find('http') < 0: + if partial_uuid.find("http") < 0: uuid = Constants.NIIRI[partial_uuid] return uuid def getProjectAcquisitionObjects(nidm_file_list, project_id): acq_objects = [] - isa = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') + isa = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") project_uuid = expandUUID(project_id) for file in nidm_file_list: rdf_graph = OpenGraph(file) - #find all the projects - for (project,pred,o) in rdf_graph.triples((None, None, Constants.NIDM['Project'])): - #check if it is our project + # find all the projects + for project, pred, o in rdf_graph.triples( + (None, None, Constants.NIDM["Project"]) + ): + # check if it is our project if str(project) == project_uuid: - for (session,p2,o2) in rdf_graph.triples((None,isa, Constants.NIDM['Session'])): - for (acquisition,p3,o3) in rdf_graph.triples((None, Constants.DCT['isPartOf'], session)): - for (acq_obj, p4, o4) in rdf_graph.triples((None, Constants.PROV['wasGeneratedBy'], acquisition)): - if (acq_obj, isa, Constants.NIDM['AcquisitionObject']): + for session, p2, o2 in rdf_graph.triples( + (None, isa, Constants.NIDM["Session"]) + ): + for acquisition, p3, o3 in rdf_graph.triples( + (None, Constants.DCT["isPartOf"], session) + ): + for acq_obj, p4, o4 in rdf_graph.triples( + (None, Constants.PROV["wasGeneratedBy"], acquisition) + ): + if (acq_obj, isa, Constants.NIDM["AcquisitionObject"]): acq_objects.append(acq_obj) return acq_objects + @functools.lru_cache(maxsize=LARGEST_CACHE_SIZE) def GetDatatypeSynonyms(nidm_file_list, project_id, datatype): - ''' + """ Try to match a datatype string with any of the known info about a data element Returns all the possible synonyms for that datatype For example, if AGE_AT_SCAN is a data element prefix, return the label, datumType, measureOf URI, prefix, etc. @@ -664,72 +740,115 @@ def GetDatatypeSynonyms(nidm_file_list, project_id, datatype): :param project_id: :param datatype: :return: - ''' + """ if datatype.startswith("instruments."): datatype = datatype[12:] if datatype.startswith("derivatives."): datatype = datatype[12:] project_data_elements = GetProjectDataElements(nidm_file_list, project_id) all_synonyms = set([datatype]) - for dti in project_data_elements['data_type_info']: - #modified by DBK 7/25/2022 + for dti in project_data_elements["data_type_info"]: + # modified by DBK 7/25/2022 # if str(datatype) in [ str(x) for x in [dti['source_variable'], dti['label'], dti['datumType'], dti['measureOf'], URITail(dti['measureOf']), str(dti['isAbout']), URITail(dti['isAbout']), dti['dataElement'], dti['dataElementURI'], dti['prefix']] ]: - if (any(str(datatype) in str(x) for x in - [dti['source_variable'], dti['label'], dti['datumType'], dti['measureOf'], URITail(dti['measureOf']), - str(dti['isAbout']), URITail(dti['isAbout']), dti['dataElement'], dti['dataElementURI'], dti['prefix']])): - all_synonyms = all_synonyms.union(set([str(dti['source_variable']), str(dti['label']), str(dti['datumType']), str(dti['measureOf']), URITail(dti['measureOf']), str(dti['isAbout']), str(dti['dataElement']), str(dti['dataElementURI'])] )) + if any( + str(datatype) in str(x) + for x in [ + dti["source_variable"], + dti["label"], + dti["datumType"], + dti["measureOf"], + URITail(dti["measureOf"]), + str(dti["isAbout"]), + URITail(dti["isAbout"]), + dti["dataElement"], + dti["dataElementURI"], + dti["prefix"], + ] + ): + all_synonyms = all_synonyms.union( + set( + [ + str(dti["source_variable"]), + str(dti["label"]), + str(dti["datumType"]), + str(dti["measureOf"]), + URITail(dti["measureOf"]), + str(dti["isAbout"]), + str(dti["dataElement"]), + str(dti["dataElementURI"]), + ] + ) + ) all_synonyms.remove("") # remove the empty string in case that is in there return all_synonyms + def GetProjectDataElements(nidm_file_list, project_id): ### added by DBK...changing to dictionary to support labels along with uuids - #result = [] + # result = [] result = {} result["uuid"] = [] - result['label']= [] - result['data_type_info'] = [] - isa = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') + result["label"] = [] + result["data_type_info"] = [] + isa = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") # if this isn't already a URI, make it one. # calls from the REST api don't include the URI project = project_id - if project_id.find('http') < 0: + if project_id.find("http") < 0: project = Constants.NIIRI[project_id] for file in nidm_file_list: rdf_graph = OpenGraph(file) - #find all the sessions - for (session, cde_tuple, o) in rdf_graph.triples((None, None, Constants.NIDM['Session'])): #rdf_graph.subjects(object=isa, predicate=Constants.NIDM['Session']): - #check if it is part of our project - if (session, Constants.DCT['isPartOf'], project) in rdf_graph: + # find all the sessions + for session, cde_tuple, o in rdf_graph.triples( + (None, None, Constants.NIDM["Session"]) + ): # rdf_graph.subjects(object=isa, predicate=Constants.NIDM['Session']): + # check if it is part of our project + if (session, Constants.DCT["isPartOf"], project) in rdf_graph: # we know we have the right file, so just grab all the data elements from here - for de in rdf_graph.subjects(isa, Constants.NIDM['DataElement']): + for de in rdf_graph.subjects(isa, Constants.NIDM["DataElement"]): ### added by DBK to return label as well as UUID - #result.append(rdf_graph.namespace_manager.compute_qname(str(de))[2]) - for label in rdf_graph.objects(subject=de, predicate=Constants.RDFS['label']): - #result.append(rdf_graph.namespace_manager.compute_qname(str(de))[2] + "=" + label) - result["uuid"].append(rdf_graph.namespace_manager.compute_qname(str(de))[2]) + # result.append(rdf_graph.namespace_manager.compute_qname(str(de))[2]) + for label in rdf_graph.objects( + subject=de, predicate=Constants.RDFS["label"] + ): + # result.append(rdf_graph.namespace_manager.compute_qname(str(de))[2] + "=" + label) + result["uuid"].append( + rdf_graph.namespace_manager.compute_qname(str(de))[2] + ) result["label"].append(label) result["data_type_info"].append(getDataTypeInfo(rdf_graph, de)) ### added by DBK...we should also look for data elements that are sub-classes of Constants.NIDM['DataElement'] ### to include any freesurfer, fsl, or ants data elements - for subclass in rdf_graph.subjects(predicate=Constants.RDFS["subClassOf"],object=Constants.NIDM['DataElement']): + for subclass in rdf_graph.subjects( + predicate=Constants.RDFS["subClassOf"], + object=Constants.NIDM["DataElement"], + ): for de in rdf_graph.subjects(isa, subclass): # and let's return the labels as well to make things more readable. - for label in rdf_graph.objects(subject=de, predicate=Constants.RDFS['label']): - #result.append(rdf_graph.namespace_manager.compute_qname(str(de))[2] + "=" + label) - result["uuid"].append(rdf_graph.namespace_manager.compute_qname(str(de))[2]) + for label in rdf_graph.objects( + subject=de, predicate=Constants.RDFS["label"] + ): + # result.append(rdf_graph.namespace_manager.compute_qname(str(de))[2] + "=" + label) + result["uuid"].append( + rdf_graph.namespace_manager.compute_qname(str(de))[2] + ) result["label"].append(label) - result["data_type_info"].append(getDataTypeInfo(rdf_graph, de)) + result["data_type_info"].append( + getDataTypeInfo(rdf_graph, de) + ) # Since common data elements won't have entries in the main graph, try to find them also cde_set = set() - for stat_collection in rdf_graph.subjects(isa, Constants.NIDM['FSStatsCollection']): + for stat_collection in rdf_graph.subjects( + isa, Constants.NIDM["FSStatsCollection"] + ): for predicate in rdf_graph.predicates(subject=stat_collection): dti = getDataTypeInfo(None, predicate) if dti: - cde_tuple = (predicate, dti["label"]) - cde_set.add( cde_tuple ) + cde_tuple = (predicate, dti["label"]) + cde_set.add(cde_tuple) for cde in cde_set: result["uuid"].append(cde[0]) @@ -743,31 +862,38 @@ def GetProjectDataElements(nidm_file_list, project_id): # in case someone passes in a filter subject with a full http or https URI, strip it back to just the bit after the namespace def splitSubject(subject): if subject.find("http") > -1: - matches = re.match(r'.*(https?://[^/]+[^\. ]+)', subject) + matches = re.match(r".*(https?://[^/]+[^\. ]+)", subject) URI = matches.group(1) subject = str(subject).replace(URI, URITail(URI)) return subject.split(".") + def URITail(URI): - ''' + """ Returns the last bit of a URI. Useful for pulling out datatype from long namespaces , e.g. http://purl.org/nidash/fsl#fsl_000032 :param URI: string :return: string - ''' - tail = URI.split('/')[-1] - tail = tail.split('#')[-1] - return tail + """ + tail = URI.split("/")[-1] + tail = tail.split("#")[-1] + return tail + def trimWellKnownURIPrefix(uri): trimmed = uri - for p in ['http://purl.org/nidash/nidm#', 'http://www.w3.org/ns/prov#', 'http://iri.nidash.org/']: - trimmed = str(trimmed).replace(p, '') + for p in [ + "http://purl.org/nidash/nidm#", + "http://www.w3.org/ns/prov#", + "http://iri.nidash.org/", + ]: + trimmed = str(trimmed).replace(p, "") return trimmed + def CheckSubjectMatchesFilter(nidm_file_list, project_uuid, subject_uuid, filter): - ''' + """ filter should look something like: instruments.AGE gt 12 and instruments.SITE_ID eq CMU @@ -776,37 +902,34 @@ def CheckSubjectMatchesFilter(nidm_file_list, project_uuid, subject_uuid, filter :param subject_uuid: :param filter: :return: - ''' - + """ if filter == None: return True # filter can have multiple and clauses, break them up and test each one - tests = filter.split('and') - - + tests = filter.split("and") for test in tests: found_match = False - split_array = test.split(' ') + split_array = test.split(" ") # TODO: I need to fix this here. When there is a space inside the value the splitter gets more than 3 values # ex: 'projects.subjects.instruments.WISC_IV_VOCAB_SCALED eq \'not a match\'' # in this case we must have spaces in identifier: 'projects.subjects.instruments.age at scan eq 21 # not guaranteed to always be an 'eq' separator. # TODO: Make more robust! - #if len(split_array) > 3: + # if len(split_array) > 3: # split_array = test.split('eq') # compound_sub = split_array[0] # op = 'eq' # value = ' '.join(split_array[1:]) - #else: + # else: compound_sub = split_array[0] op = split_array[1] - value = ' '.join(split_array[2:]) + value = " ".join(split_array[2:]) - #if the value is a string, it will have quotes around it. Strip them out now - for quote in ["'", "\"", "`"]: + # if the value is a string, it will have quotes around it. Strip them out now + for quote in ["'", '"', "`"]: if value[0] == quote and value[-1] == quote: value = value[1:-1] @@ -818,28 +941,46 @@ def CheckSubjectMatchesFilter(nidm_file_list, project_uuid, subject_uuid, filter # no instruments or derivatives prefix was entered, so test in both term = sub_pieces[0] - if (len(sub_pieces) == 2 and sub_pieces[0] == 'instruments') or len(sub_pieces) == 1: + if (len(sub_pieces) == 2 and sub_pieces[0] == "instruments") or len( + sub_pieces + ) == 1: if len(sub_pieces) == 2: - term = sub_pieces[1] # 'AGE_AT_SCAN' for example + term = sub_pieces[1] # 'AGE_AT_SCAN' for example synonyms = GetDatatypeSynonyms(tuple(nidm_file_list), project_uuid, term) - instrument_details = GetParticipantInstrumentData(nidm_file_list, project_uuid, subject_uuid) + instrument_details = GetParticipantInstrumentData( + nidm_file_list, project_uuid, subject_uuid + ) for instrument_uuid in instrument_details: for instrument_term in instrument_details[instrument_uuid]: if instrument_term in synonyms: - found_match = filterCompare(instrument_details[instrument_uuid][instrument_term], op, value) + found_match = filterCompare( + instrument_details[instrument_uuid][instrument_term], + op, + value, + ) if found_match: break - if (len(sub_pieces) == 2 and sub_pieces[0] == 'derivatives') or len(sub_pieces) == 1: + if (len(sub_pieces) == 2 and sub_pieces[0] == "derivatives") or len( + sub_pieces + ) == 1: if len(sub_pieces) == 2: - term = sub_pieces[1] # 'ilx:0102597' for example - derivatives_details = GetDerivativesDataForSubject(nidm_file_list, project_uuid, subject_uuid) + term = sub_pieces[1] # 'ilx:0102597' for example + derivatives_details = GetDerivativesDataForSubject( + nidm_file_list, project_uuid, subject_uuid + ) for key in derivatives_details: - derivatives = derivatives_details[key]['values'] - for vkey in derivatives: # values will be in the form { http://example.com/a/b/c#fs_00001 : { datumType: '', label: '', value: '', units:'' }, ... } + derivatives = derivatives_details[key]["values"] + for ( + vkey + ) in ( + derivatives + ): # values will be in the form { http://example.com/a/b/c#fs_00001 : { datumType: '', label: '', value: '', units:'' }, ... } short_key = URITail(vkey) if short_key == term: - found_match = filterCompare(derivatives[vkey]['value'], op, value) + found_match = filterCompare( + derivatives[vkey]["value"], op, value + ) if found_match: break @@ -849,31 +990,33 @@ def CheckSubjectMatchesFilter(nidm_file_list, project_uuid, subject_uuid, filter return True + def filterCompare(left, op, right): try: - if op == 'eq': - return (left == right) - elif op == 'lt': - return (float(left) < float(right)) - elif op == 'gt': - return (float(left) > float(right)) + if op == "eq": + return left == right + elif op == "lt": + return float(left) < float(right) + elif op == "gt": + return float(left) > float(right) except: pass return None + def GetProjectsMetadata(nidm_file_list): - ''' + """ :param nidm_file_list: List of one or more NIDM files to query for project meta data :return: dataframe with two columns: "project_uuid" and "project_dentifier" - ''' + """ - query = ''' + query = """ PREFIX sio: PREFIX rdf: PREFIX nidm: SELECT DISTINCT ?property ?o ?s WHERE {{ ?s a nidm:Project . ?s ?property ?o }} - ''' + """ df = sparql_query_nidm(nidm_file_list, query, output_file=None) projects = {} @@ -888,7 +1031,7 @@ def GetProjectsMetadata(nidm_file_list): # if field in field_whitelist: projects[str(project)][field] = value - return {'projects': compressForJSONResponse(projects)} + return {"projects": compressForJSONResponse(projects)} # def GetProjectsComputedMetadata(nidm_file_list): @@ -902,9 +1045,9 @@ def GetProjectsMetadata(nidm_file_list): # # return compressForJSONResponse(meta_data) -def GetDataElements(nidm_file_list): - query=''' +def GetDataElements(nidm_file_list): + query = """ select distinct ?uuid ?DataElements where { @@ -912,12 +1055,14 @@ def GetDataElements(nidm_file_list): filter( regex(str(?DataElements), "DataElement" )) - }''' + }""" - df = sparql_query_nidm(nidm_file_list.split(','), query, output_file=None) + df = sparql_query_nidm(nidm_file_list.split(","), query, output_file=None) return df + + def GetBrainVolumeDataElements(nidm_file_list): - query=''' + query = """ prefix rdf: prefix rdfs: prefix prov: @@ -957,17 +1102,18 @@ def GetBrainVolumeDataElements(nidm_file_list): } OPTIONAL {?element_id nidm:isAbout ?federatedLabel }. OPTIONAL {?element_id nidm:hasLaterality ?laterality }. - }''' + }""" - df = sparql_query_nidm(nidm_file_list.split(','), query, output_file=None) + df = sparql_query_nidm(nidm_file_list.split(","), query, output_file=None) # now let's strip off the for index, row in df.iterrows(): - tmp = row['element_id'] - row['element_id'] = re.search(r'(.*)/(.*)',tmp).group(2) + tmp = row["element_id"] + row["element_id"] = re.search(r"(.*)/(.*)", tmp).group(2) return df + def GetBrainVolumes(nidm_file_list): - query=''' + query = """ # This query simply returns the brain volume data without dependencies on other demographics/assessment measures. prefix rdf: @@ -1002,39 +1148,39 @@ def GetBrainVolumes(nidm_file_list): OPTIONAL {?measure nidm:hasLaterality ?laterality }. } - ''' + """ - df = sparql_query_nidm(nidm_file_list.split(','), query, output_file=None) + df = sparql_query_nidm(nidm_file_list.split(","), query, output_file=None) return df - def expandNIDMAbbreviation(shortKey) -> str: - ''' - Takes a shorthand identifier such as dct:description and returns the + """ + Takes a shorthand identifier such as dct:description and returns the full URI http://purl.org/dc/terms/description :param shortKey: :type shortKey: str :return: - ''' + """ newkey = skey = str(shortKey) match = re.search(r"^([^:]+):([^:]+)$", skey) if match: newkey = Constants.namespaces[match.group(1)] + match.group(2) return newkey + def compressForJSONResponse(data) -> dict: - ''' + """ Takes a Dictionary and shortens any key by replacing a full URI with the NIDM prefix :param data: Data to search for long URIs that can be replaced with prefixes :return: Dictionary - ''' + """ new_dict = {} - if isinstance(data,dict): + if isinstance(data, dict): for key, value in data.items(): new_dict[matchPrefix(key)] = compressForJSONResponse(value) else: @@ -1042,15 +1188,16 @@ def compressForJSONResponse(data) -> dict: return new_dict + def matchPrefix(possible_URI, short=False) -> str: - ''' + """ If the possible_URI is found in Constants.namespaces it will be replaced with the prefix :param possible_URI: URI string to look at :type possible_URI: str :return: Returns a - ''' + """ for k, n in Constants.namespaces.items(): if possible_URI.startswith(n): if short: @@ -1060,145 +1207,184 @@ def matchPrefix(possible_URI, short=False) -> str: # also check the prov prefix if possible_URI.startswith("http://www.w3.org/ns/prov#"): - return "{}:{}".format("prov", possible_URI.replace("http://www.w3.org/ns/prov#", "")) + return "{}:{}".format( + "prov", possible_URI.replace("http://www.w3.org/ns/prov#", "") + ) return possible_URI + # check if this activity is linked by a blank node to one of the sw agents def activityIsSWAgent(rdf_graph, activity, sw_agents): - ''' + """ Returns True if the given activity is associated with a software agent from the sw_agents array :param rdf_graph: Graph :param activity: activity URI :param sw_agents: array of software agent URIs :return: Boolean - ''' + """ if activity in sw_agents: return True return False + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) -def getDerivativesNodesForSubject (rdf_graph, subject): - ''' +def getDerivativesNodesForSubject(rdf_graph, subject): + """ Finds all the URIs that were generated by software agents and linked to the subject :param rdf_graph: :param subject: :return: Array of StatsCollections URIs - ''' + """ - qualified_association = URIRef('http://www.w3.org/ns/prov#qualifiedAssociation') - was_associated_with = URIRef('http://www.w3.org/ns/prov#wasAssociatedWith') + qualified_association = URIRef("http://www.w3.org/ns/prov#qualifiedAssociation") + was_associated_with = URIRef("http://www.w3.org/ns/prov#wasAssociatedWith") sw_agents = getSoftwareAgents(rdf_graph) derivatives_uris = [] - for blank, p, o in rdf_graph.triples( (None, Constants.PROV['agent'], subject)): # get the blank nodes associated with the subject + for blank, p, o in rdf_graph.triples( + (None, Constants.PROV["agent"], subject) + ): # get the blank nodes associated with the subject # verify this blank node points to the subject somewhere it has the role subject - if (blank, Constants.PROV['hadRole'], Constants.SIO['Subject']) in rdf_graph: + if (blank, Constants.PROV["hadRole"], Constants.SIO["Subject"]) in rdf_graph: # get the activity that's the parent of the blank node - for activity in rdf_graph.subjects(predicate=Constants.PROV['qualifiedAssociation'], object=blank): + for activity in rdf_graph.subjects( + predicate=Constants.PROV["qualifiedAssociation"], object=blank + ): # try to find if this activity has a qualified association with a software agent (through a blank node) - for software_blank in rdf_graph.objects(subject=activity, predicate=Constants.PROV['qualifiedAssociation']): - for software_agent in rdf_graph.objects(subject=software_blank, predicate=Constants.PROV['agent']): + for software_blank in rdf_graph.objects( + subject=activity, predicate=Constants.PROV["qualifiedAssociation"] + ): + for software_agent in rdf_graph.objects( + subject=software_blank, predicate=Constants.PROV["agent"] + ): if activityIsSWAgent(rdf_graph, software_agent, sw_agents): # now we know our activity generated a stats collection, so go find it (the stats_colleciton will be generated by the activity) - for stats_collection in rdf_graph.subjects(predicate=Constants.PROV['wasGeneratedBy'], object=activity): + for stats_collection in rdf_graph.subjects( + predicate=Constants.PROV["wasGeneratedBy"], + object=activity, + ): derivatives_uris.append(stats_collection) return derivatives_uris + @functools.lru_cache(maxsize=LARGEST_CACHE_SIZE) def getDataTypeInfo(source_graph, datatype): - ''' + """ Scans all the triples with subject of datatype (isa DataElement in the graph) and looks for entries with specific predicates necessary to define it's type :param rdf_graph: :param dt: URI of the DataElement :return: { 'label': label, 'hasUnit': hasUnit, 'typeURI': typeURI} - ''' - isa = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') - + """ + isa = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") expanded_datatype = datatype - if expanded_datatype.find('http') < 0: + if expanded_datatype.find("http") < 0: expanded_datatype = Constants.NIIRI[expanded_datatype] - # check to see if the datatype is in the main graph. If not, look in the CDE graph - if source_graph and (expanded_datatype, isa, Constants.NIDM['DataElement']) in source_graph: + if ( + source_graph + and (expanded_datatype, isa, Constants.NIDM["DataElement"]) in source_graph + ): rdf_graph = source_graph # check if datatype is a personal data element - elif source_graph and (expanded_datatype, isa, Constants.NIDM['PersonalDataElement']) in source_graph: + elif ( + source_graph + and (expanded_datatype, isa, Constants.NIDM["PersonalDataElement"]) + in source_graph + ): rdf_graph = source_graph else: rdf_graph = nidm.experiment.CDE.getCDEs() - typeURI = '' - hasUnit = '' - label = '' - description = '' - measureOf = '' - isAbout = '' - structure = '' - prefix = '' - source_variable = '' + typeURI = "" + hasUnit = "" + label = "" + description = "" + measureOf = "" + isAbout = "" + structure = "" + prefix = "" + source_variable = "" found = False - # have to scan all tripples because the label can be in any namespace for s, p, o in rdf_graph.triples((expanded_datatype, None, None)): found = True - if (re.search(r'label$', str(p)) != None): + if re.search(r"label$", str(p)) != None: label = o - if (re.search(r'source_variable$', str(p)) != None): + if re.search(r"source_variable$", str(p)) != None: source_variable = o - elif (re.search(r'sourceVariable$', str(p)) != None): + elif re.search(r"sourceVariable$", str(p)) != None: source_variable = o - if (re.search(r'description$', str(p)) != None): + if re.search(r"description$", str(p)) != None: description = o - if (re.search(r'hasUnit$', str(p), flags=re.IGNORECASE) != None): + if re.search(r"hasUnit$", str(p), flags=re.IGNORECASE) != None: hasUnit = o - if (re.search(r'datumType$', str(p)) != None): - typeURI = str(o).split('/')[-1] - if (re.search(r'measureOf$', str(p)) != None): + if re.search(r"datumType$", str(p)) != None: + typeURI = str(o).split("/")[-1] + if re.search(r"measureOf$", str(p)) != None: measureOf = o - if (re.search(r'isAbout$', str(p), flags=re.IGNORECASE) != None): + if re.search(r"isAbout$", str(p), flags=re.IGNORECASE) != None: isAbout = o - possible_prefix = [x for x in rdf_graph.namespaces() if expanded_datatype.startswith(x[1])] - if (len(possible_prefix) > 0): + possible_prefix = [ + x for x in rdf_graph.namespaces() if expanded_datatype.startswith(x[1]) + ] + if len(possible_prefix) > 0: prefix = possible_prefix[0][0] - if not found: return False else: - return {'label': label, 'hasUnit': hasUnit, 'datumType': typeURI, 'measureOf': measureOf, 'isAbout': isAbout, - 'dataElement': str(URITail(s)), 'dataElementURI': s, 'description': description, 'prefix': prefix, - 'source_variable': source_variable} + return { + "label": label, + "hasUnit": hasUnit, + "datumType": typeURI, + "measureOf": measureOf, + "isAbout": isAbout, + "dataElement": str(URITail(s)), + "dataElementURI": s, + "description": description, + "prefix": prefix, + "source_variable": source_variable, + } -def getStatsCollectionForNode (rdf_graph, derivatives_node): - isa = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') - data = {'URI': derivatives_node, 'values': {}} +def getStatsCollectionForNode(rdf_graph, derivatives_node): + isa = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") + data = {"URI": derivatives_node, "values": {}} for s, datatype, value in rdf_graph.triples((derivatives_node, None, None)): - if datatype == isa and str(value).find('http://purl.org/nidash/nidm#') == 0: - data['StatCollectionType'] = str(value)[28:] + if datatype == isa and str(value).find("http://purl.org/nidash/nidm#") == 0: + data["StatCollectionType"] = str(value)[28:] else: - dti = getDataTypeInfo(rdf_graph, datatype ) - if dti: # if we can't find a datatype then this is non-data info so don't record it - data['values'][str(datatype)] = {'datumType': str(dti['datumType']), 'label': str(dti['label']), 'value': str(value), 'units': str(dti['hasUnit']), 'isAbout': str(dti['isAbout'])} + dti = getDataTypeInfo(rdf_graph, datatype) + if ( + dti + ): # if we can't find a datatype then this is non-data info so don't record it + data["values"][str(datatype)] = { + "datumType": str(dti["datumType"]), + "label": str(dti["label"]), + "value": str(value), + "units": str(dti["hasUnit"]), + "isAbout": str(dti["isAbout"]), + } return data + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def OpenGraph(file): - ''' + """ Returns a parsed RDFLib Graph object for the given file The file will be hashed and if a pickled copy is found in the TMP dir, that will be used Otherwise the graph will be computed and then saved in the TMP dir as a pickle file @@ -1206,52 +1392,56 @@ def OpenGraph(file): :param file: filename :return: Graph - ''' + """ # if someone passed me a RDF graph rather than a file, just send it back if isinstance(file, rdflib.graph.Graph): return file - # If we have a Blazegraph instance, load the data then do the rest - if 'BLAZEGRAPH_URL' in environ.keys(): + if "BLAZEGRAPH_URL" in environ.keys(): try: f = open(file) data = f.read() logging.debug("Sending {} to blazegraph".format(file)) - r = requests.post(url=environ['BLAZEGRAPH_URL'], data=data, headers={'Content-type': 'application/x-turtle'}) + r = requests.post( + url=environ["BLAZEGRAPH_URL"], + data=data, + headers={"Content-type": "application/x-turtle"}, + ) except Exception as e: logging.error("Exception {} loading {} into Blazegraph.".format(e, file)) - BLOCKSIZE = 65536 hasher = hashlib.md5() - with open(file, 'rb') as afile: + with open(file, "rb") as afile: buf = afile.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = afile.read(BLOCKSIZE) hash = hasher.hexdigest() - pickle_file = '{}/rdf_graph.{}.pickle'.format( tempfile.gettempdir(), hash) + pickle_file = "{}/rdf_graph.{}.pickle".format(tempfile.gettempdir(), hash) if path.isfile(pickle_file): return pickle.load(open(pickle_file, "rb")) rdf_graph = Graph() rdf_graph.parse(file, format=util.guess_format(file)) - pickle.dump(rdf_graph, open(pickle_file, 'wb')) + pickle.dump(rdf_graph, open(pickle_file, "wb")) # new graph, so to be safe clear out all cached entries memory.clear(warn=False) return rdf_graph + def GetDerivativesDataForSubject(files, project, subject): - return GetDerivativesDataForSubjectCache (tuple(files), project, subject) + return GetDerivativesDataForSubjectCache(tuple(files), project, subject) + @functools.lru_cache(maxsize=QUERY_CACHE_SIZE) def GetDerivativesDataForSubjectCache(files, project, subject): - ''' + """ Searches for the subject in the supplied RDF .ttl files and returns an array of all the data generated by software agents about that subject @@ -1259,11 +1449,11 @@ def GetDerivativesDataForSubjectCache(files, project, subject): :param files: Array of RDF .ttl files :param subject: The URI (or just the bit after the NIIRI prefix) of a subject :return: Array of stat collections for the subject - ''' + """ # if this isn't already a URI, make it one. # calls from the REST api don't include the URI - if subject.find('http') < 0: + if subject.find("http") < 0: subject = Constants.NIIRI[subject] data = {} @@ -1272,43 +1462,45 @@ def GetDerivativesDataForSubjectCache(files, project, subject): rdf_graph = OpenGraph(nidm_file) for node in getDerivativesNodesForSubject(rdf_graph, subject): collection = getStatsCollectionForNode(rdf_graph, node) - key = str(collection['URI']).split('/')[-1] + key = str(collection["URI"]).split("/")[-1] data[key] = collection return data + def getSoftwareAgents(rdf_graph): - ''' + """ Scans the supplied graph and returns any software agenyt URIs found there :param rdf_graph: a parsed RDF Graph :return: array of agent URIs - ''' + """ - isa = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') - software_agent = URIRef('http://www.w3.org/ns/prov#SoftwareAgent') + isa = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") + software_agent = URIRef("http://www.w3.org/ns/prov#SoftwareAgent") agents = [] - for s,o,p in rdf_graph.triples( (None, isa, software_agent) ): + for s, o, p in rdf_graph.triples((None, isa, software_agent)): agents.append(s) return agents + def download_cde_files(): - cde_dir = tempfile.gettempdir() + cde_dir = tempfile.gettempdir() - for url in Constants.CDE_FILE_LOCATIONS: - urlretrieve(url, "{}/{}".format(cde_dir, url.split('/')[-1])) + for url in Constants.CDE_FILE_LOCATIONS: + urlretrieve(url, "{}/{}".format(cde_dir, url.split("/")[-1])) - return cde_dir + return cde_dir -def getCDEs(file_list=None): +def getCDEs(file_list=None): if getCDEs.cache: return getCDEs.cache hasher = hashlib.md5() - hasher.update(str(file_list).encode('utf-8')) + hasher.update(str(file_list).encode("utf-8")) h = hasher.hexdigest() cache_file_name = tempfile.gettempdir() + "/cde_graph.{}.pickle".format(h) @@ -1321,38 +1513,36 @@ def getCDEs(file_list=None): rdf_graph = Graph() if not file_list: - - cde_dir = '' + cde_dir = "" if "CDE_DIR" in os.environ: - cde_dir = os.environ['CDE_DIR'] + cde_dir = os.environ["CDE_DIR"] - if (not cde_dir) and (os.path.isfile( '/opt/project/nidm/core/cde_dir/ants_cde.ttl' )): - cde_dir = '/opt/project/nidm/core/cde_dir' + if (not cde_dir) and ( + os.path.isfile("/opt/project/nidm/core/cde_dir/ants_cde.ttl") + ): + cde_dir = "/opt/project/nidm/core/cde_dir" - if (not cde_dir): + if not cde_dir: cde_dir = download_cde_files() # TODO: the list of file names should be it's own constant or derived from CDE_FILE_LOCATIONS - file_list = [ ] - for f in ['ants_cde.ttl', 'fs_cde.ttl', 'fsl_cde.ttl']: - fname = '{}/{}'.format(cde_dir, f) - if os.path.isfile( fname ): - file_list.append( fname ) - - + file_list = [] + for f in ["ants_cde.ttl", "fs_cde.ttl", "fsl_cde.ttl"]: + fname = "{}/{}".format(cde_dir, f) + if os.path.isfile(fname): + file_list.append(fname) for fname in file_list: if os.path.isfile(fname): cde_graph = OpenGraph(fname) rdf_graph = rdf_graph + cde_graph - - - - cache_file = open(cache_file_name , 'wb') + cache_file = open(cache_file_name, "wb") pickle.dump(rdf_graph, cache_file) cache_file.close() getCDEs.cache = rdf_graph return rdf_graph -getCDEs.cache = None \ No newline at end of file + + +getCDEs.cache = None diff --git a/nidm/experiment/README.md b/nidm/experiment/README.md index 08e53d8e..f289c1a1 100644 --- a/nidm/experiment/README.md +++ b/nidm/experiment/README.md @@ -1,3 +1,2 @@ # NIDM-Experiment Python API Python API to create, query, read, and write NIDM-Experiment documents. - diff --git a/nidm/experiment/Session.py b/nidm/experiment/Session.py index a65d008d..7287e5b6 100644 --- a/nidm/experiment/Session.py +++ b/nidm/experiment/Session.py @@ -1,14 +1,15 @@ +import os +import sys +import prov.model as pm import rdflib as rdf -import os, sys -#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +# sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from ..core import Constants from ..experiment import Core from ..experiment.Core import getUUID -import prov.model as pm -class Session(pm.ProvActivity,Core): +class Session(pm.ProvActivity, Core): """Class for NIDM-Experimenent Session-Level Objects. Default constructor uses empty graph with namespaces added from NIDM/Scripts/Constants.py. @@ -19,8 +20,9 @@ class Session(pm.ProvActivity,Core): @copyright: University of California, Irvine 2017 """ - #constructor - def __init__(self, project,uuid=None,attributes=None,add_default_type=True): + + # constructor + def __init__(self, project, uuid=None, attributes=None, add_default_type=True): """ Default constructor, creates a session activity and links to project object @@ -30,12 +32,24 @@ def __init__(self, project,uuid=None,attributes=None,add_default_type=True): """ if uuid is None: self._uuid = getUUID() - #execute default parent class constructor - super(Session,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) + # execute default parent class constructor + super(Session, self).__init__( + project.graph, + pm.QualifiedName( + pm.Namespace("niiri", Constants.NIIRI), self.get_uuid() + ), + attributes, + ) else: self._uuid = uuid - #execute default parent class constructor - super(Session,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) + # execute default parent class constructor + super(Session, self).__init__( + project.graph, + pm.QualifiedName( + pm.Namespace("niiri", Constants.NIIRI), self.get_uuid() + ), + attributes, + ) project.graph._add_record(self) @@ -45,27 +59,32 @@ def __init__(self, project,uuid=None,attributes=None,add_default_type=True): self.graph = project.graph project.add_sessions(self) - #list of acquisitions associated with this session - self._acquisitions=[] - def add_acquisition(self,acquisition): + # list of acquisitions associated with this session + self._acquisitions = [] + + def add_acquisition(self, acquisition): self._acquisitions.extend([acquisition]) - #create links in graph - acquisition.add_attributes({pm.QualifiedName(pm.Namespace("dct",Constants.DCT),'isPartOf'):self}) + # create links in graph + acquisition.add_attributes( + {pm.QualifiedName(pm.Namespace("dct", Constants.DCT), "isPartOf"): self} + ) + def get_acquisitions(self): return self._acquisitions - def acquisition_exist(self,uuid): - ''' + + def acquisition_exist(self, uuid): + """ Checks whether uuid is a registered acquisition :param uuid: full uuid of acquisition :return: True if exists, False otherwise - ''' - #print("Query uuid: %s" %uuid) + """ + # print("Query uuid: %s" %uuid) for acquisitions in self._acquisitions: - #print(acquisitions._identifier._localpart) + # print(acquisitions._identifier._localpart) if str(uuid) == acquisitions._identifier._localpart: return True return False + def __str__(self): return "NIDM-Experiment Session Class" - diff --git a/nidm/experiment/Utils.py b/nidm/experiment/Utils.py index 81c9f9de..7e6f2fc5 100644 --- a/nidm/experiment/Utils.py +++ b/nidm/experiment/Utils.py @@ -1,307 +1,438 @@ -import os,sys - - -from rdflib import Namespace, Literal,RDFS -from rdflib.namespace import XSD -from rdflib.resource import Resource -from rdflib.util import from_n3 -from urllib.parse import urlparse, urlsplit -from rdflib import Graph, RDF, URIRef, util -from rdflib.namespace import split_uri -import validators -import prov.model as pm -from prov.model import QualifiedName, Identifier -from prov.model import Namespace as provNamespace -import requests -from rapidfuzz import fuzz +from binascii import crc32 +import getpass import json +import logging +import os +import sys +from urllib.parse import urlparse, urlsplit +from urllib.request import urlopen +from uuid import UUID from github import Github, GithubException -import getpass from numpy import base_repr -from binascii import crc32 import pandas as pd -from uuid import UUID -from urllib.request import urlopen - -#NIDM imports -from ..core import Constants -from ..core.Constants import DD - - -from .Core import getUUID -from .Project import Project -from .Session import Session +import prov.model as pm +from prov.model import Identifier +from prov.model import Namespace as provNamespace +from prov.model import QualifiedName +from rapidfuzz import fuzz +from rdflib import RDF, RDFS, Graph, Literal, Namespace, URIRef, util +from rdflib.namespace import XSD, split_uri +from rdflib.resource import Resource +from rdflib.util import from_n3 +import requests +import validators from .Acquisition import Acquisition -from .MRAcquisition import MRAcquisition -from .PETAcquisition import PETAcquisition from .AcquisitionObject import AcquisitionObject from .AssessmentAcquisition import AssessmentAcquisition from .AssessmentObject import AssessmentObject -from .DerivativeObject import DerivativeObject -from .Derivative import Derivative +from .Core import Core, getUUID from .DataElement import DataElement +from .Derivative import Derivative +from .DerivativeObject import DerivativeObject +from .MRAcquisition import MRAcquisition from .MRObject import MRObject +from .PETAcquisition import PETAcquisition from .PETObject import PETObject -from .Core import Core -import logging +from .Project import Project +from .Session import Session + +# NIDM imports +from ..core import Constants +from ..core.Constants import DD + logger = logging.getLogger(__name__) +import random import re import string -import random -#Interlex stuff +# cognitive atlas +from cognitiveatlas.api import get_concept, get_disorder +from datalad.support.annexrepo import AnnexRepo + +# Interlex stuff import ontquery as oq # datalad / git-annex sources -from datalad.support.annexrepo import AnnexRepo - -# cognitive atlas -from cognitiveatlas.api import get_concept, get_disorder # set if we're running in production or testing mode -#INTERLEX_MODE = 'test' -INTERLEX_MODE = 'production' -if INTERLEX_MODE == 'test': - INTERLEX_PREFIX = 'tmp_' - #INTERLEX_ENDPOINT = "https://beta.scicrunch.org/api/1/" +# INTERLEX_MODE = 'test' +INTERLEX_MODE = "production" +if INTERLEX_MODE == "test": + INTERLEX_PREFIX = "tmp_" + # INTERLEX_ENDPOINT = "https://beta.scicrunch.org/api/1/" INTERLEX_ENDPOINT = "https://test3.scicrunch.org/api/1/" -elif INTERLEX_MODE == 'production': - INTERLEX_PREFIX = 'ilx_' +elif INTERLEX_MODE == "production": + INTERLEX_PREFIX = "ilx_" INTERLEX_ENDPOINT = "https://scicrunch.org/api/1/" else: print("ERROR: Interlex mode can only be 'test' or 'production'") exit(1) - def safe_string(string): - return string.strip().replace(" ","_").replace("-", "_").replace(",", "_").replace("(", "_").replace(")","_")\ - .replace("'","_").replace("/", "_").replace("#","num") + return ( + string.strip() + .replace(" ", "_") + .replace("-", "_") + .replace(",", "_") + .replace("(", "_") + .replace(")", "_") + .replace("'", "_") + .replace("/", "_") + .replace("#", "num") + ) + def read_nidm(nidmDoc): """ - Loads nidmDoc file into NIDM-Experiment structures and returns objects + Loads nidmDoc file into NIDM-Experiment structures and returns objects - :nidmDoc: a valid RDF NIDM-experiment document (deserialization formats supported by RDFLib) + :nidmDoc: a valid RDF NIDM-experiment document (deserialization formats supported by RDFLib) - :return: NIDM Project + :return: NIDM Project """ from ..experiment.Project import Project from ..experiment.Session import Session - # read RDF file into temporary graph rdf_graph = Graph() - rdf_graph_parse = rdf_graph.parse(nidmDoc,format=util.guess_format(nidmDoc)) - + rdf_graph_parse = rdf_graph.parse(nidmDoc, format=util.guess_format(nidmDoc)) # Query graph for project metadata and create project level objects # Get subject URI for project - proj_id=None - for s in rdf_graph_parse.subjects(predicate=RDF.type,object=URIRef(Constants.NIDM_PROJECT.uri)): - #print(s) - proj_id=s + proj_id = None + for s in rdf_graph_parse.subjects( + predicate=RDF.type, object=URIRef(Constants.NIDM_PROJECT.uri) + ): + # print(s) + proj_id = s if proj_id is None: print("Error reading NIDM-Exp Document %s, Must have Project Object" % nidmDoc) print() create_obj = input("Should read_nidm create a Project object for you [yes]: ") - if (create_obj == 'yes' or create_obj == ''): - project = Project(empty_graph=True,add_default_type=True) + if create_obj == "yes" or create_obj == "": + project = Project(empty_graph=True, add_default_type=True) # add namespaces to prov graph for name, namespace in rdf_graph_parse.namespaces(): # skip these default namespaces in prov Document - if (name != 'prov') and (name != 'xsd') and (name != 'nidm') and (name != 'niiri'): + if ( + (name != "prov") + and (name != "xsd") + and (name != "nidm") + and (name != "niiri") + ): project.graph.add_namespace(name, namespace) else: exit(1) else: - #Split subject URI into namespace, term - nm,project_uuid = split_uri(proj_id) + # Split subject URI into namespace, term + nm, project_uuid = split_uri(proj_id) - #create empty prov graph - project = Project(empty_graph=True,uuid=project_uuid,add_default_type=False) + # create empty prov graph + project = Project(empty_graph=True, uuid=project_uuid, add_default_type=False) - #add namespaces to prov graph + # add namespaces to prov graph for name, namespace in rdf_graph_parse.namespaces(): - #skip these default namespaces in prov Document - if (name != 'prov') and (name != 'xsd') and (name != 'nidm') and (name != 'niiri'): + # skip these default namespaces in prov Document + if ( + (name != "prov") + and (name != "xsd") + and (name != "nidm") + and (name != "niiri") + ): project.graph.add_namespace(name, namespace) - #Cycle through Project metadata adding to prov graph - add_metadata_for_subject (rdf_graph_parse,proj_id,project.graph.namespaces,project) + # Cycle through Project metadata adding to prov graph + add_metadata_for_subject( + rdf_graph_parse, proj_id, project.graph.namespaces, project + ) + # Query graph for sessions, instantiate session objects, and add to project._session list + # Get subject URI for sessions + for s in rdf_graph_parse.subjects( + predicate=RDF.type, object=URIRef(Constants.NIDM_SESSION.uri) + ): + # print("session: %s" % s) - #Query graph for sessions, instantiate session objects, and add to project._session list - #Get subject URI for sessions - for s in rdf_graph_parse.subjects(predicate=RDF.type,object=URIRef(Constants.NIDM_SESSION.uri)): - #print("session: %s" % s) + # Split subject URI for session into namespace, uuid + nm, session_uuid = split_uri(s) - #Split subject URI for session into namespace, uuid - nm,session_uuid = split_uri(s) + # print("session uuid= %s" %session_uuid) - #print("session uuid= %s" %session_uuid) + # instantiate session with this uuid + session = Session(project=project, uuid=session_uuid, add_default_type=False) - #instantiate session with this uuid - session = Session(project=project, uuid=session_uuid,add_default_type=False) - - #add session to project + # add session to project project.add_sessions(session) - #now get remaining metadata in session object and add to session - #Cycle through Session metadata adding to prov graph - add_metadata_for_subject (rdf_graph_parse,s,project.graph.namespaces,session) + # now get remaining metadata in session object and add to session + # Cycle through Session metadata adding to prov graph + add_metadata_for_subject(rdf_graph_parse, s, project.graph.namespaces, session) - #Query graph for acquistions dct:isPartOf the session - for acq in rdf_graph_parse.subjects(predicate=Constants.DCT['isPartOf'],object=s): - #Split subject URI for session into namespace, uuid - nm,acq_uuid = split_uri(acq) + # Query graph for acquistions dct:isPartOf the session + for acq in rdf_graph_parse.subjects( + predicate=Constants.DCT["isPartOf"], object=s + ): + # Split subject URI for session into namespace, uuid + nm, acq_uuid = split_uri(acq) # print("acquisition uuid: %s" %acq_uuid) - #query for whether this is an AssessmentAcquisition of other Acquisition, etc. - for rdf_type in rdf_graph_parse.objects(subject=acq, predicate=RDF.type): - #if this is an acquisition activity, which kind? + # query for whether this is an AssessmentAcquisition of other Acquisition, etc. + for rdf_type in rdf_graph_parse.objects(subject=acq, predicate=RDF.type): + # if this is an acquisition activity, which kind? if str(rdf_type) == Constants.NIDM_ACQUISITION_ACTIVITY.uri: - #first find the entity generated by this acquisition activity - for acq_obj in rdf_graph_parse.subjects(predicate=Constants.PROV["wasGeneratedBy"],object=acq): - #Split subject URI for acquisition object (entity) into namespace, uuid - nm,acq_obj_uuid = split_uri(acq_obj) - #print("acquisition object uuid: %s" %acq_obj_uuid) - - #query for whether this is an MRI acquisition by way of looking at the generated entity and determining - #if it has the tuple [uuid Constants.NIDM_ACQUISITION_MODALITY Constants.NIDM_MRI] - if (acq_obj,URIRef(Constants.NIDM_ACQUISITION_MODALITY._uri),URIRef(Constants.NIDM_MRI._uri)) in rdf_graph: - - #check whether this acquisition activity has already been instantiated (maybe if there are multiple acquisition - #entities prov:wasGeneratedBy the acquisition + # first find the entity generated by this acquisition activity + for acq_obj in rdf_graph_parse.subjects( + predicate=Constants.PROV["wasGeneratedBy"], object=acq + ): + # Split subject URI for acquisition object (entity) into namespace, uuid + nm, acq_obj_uuid = split_uri(acq_obj) + # print("acquisition object uuid: %s" %acq_obj_uuid) + + # query for whether this is an MRI acquisition by way of looking at the generated entity and determining + # if it has the tuple [uuid Constants.NIDM_ACQUISITION_MODALITY Constants.NIDM_MRI] + if ( + acq_obj, + URIRef(Constants.NIDM_ACQUISITION_MODALITY._uri), + URIRef(Constants.NIDM_MRI._uri), + ) in rdf_graph: + # check whether this acquisition activity has already been instantiated (maybe if there are multiple acquisition + # entities prov:wasGeneratedBy the acquisition if not session.acquisition_exist(acq_uuid): - acquisition=MRAcquisition(session=session,uuid=acq_uuid,add_default_type=False) + acquisition = MRAcquisition( + session=session, + uuid=acq_uuid, + add_default_type=False, + ) session.add_acquisition(acquisition) - #Cycle through remaining metadata for acquisition activity and add attributes - add_metadata_for_subject (rdf_graph_parse,acq,project.graph.namespaces,acquisition) - + # Cycle through remaining metadata for acquisition activity and add attributes + add_metadata_for_subject( + rdf_graph_parse, + acq, + project.graph.namespaces, + acquisition, + ) - #and add acquisition object - acquisition_obj=MRObject(acquisition=acquisition,uuid=acq_obj_uuid,add_default_type=False) + # and add acquisition object + acquisition_obj = MRObject( + acquisition=acquisition, + uuid=acq_obj_uuid, + add_default_type=False, + ) acquisition.add_acquisition_object(acquisition_obj) - #Cycle through remaining metadata for acquisition entity and add attributes - add_metadata_for_subject(rdf_graph_parse,acq_obj,project.graph.namespaces,acquisition_obj) - - #MRI acquisitions may have an associated stimulus file so let's see if there is an entity - #prov:wasAttributedTo this acquisition_obj - for assoc_acq in rdf_graph_parse.subjects(predicate=Constants.PROV["wasAttributedTo"],object=acq_obj): - #get rdf:type of this entity and check if it's a nidm:StimulusResponseFile or not - #if rdf_graph_parse.triples((assoc_acq, RDF.type, URIRef("http://purl.org/nidash/nidm#StimulusResponseFile"))): - if (assoc_acq,RDF.type,URIRef(Constants.NIDM_MRI_BOLD_EVENTS._uri)) in rdf_graph: - #Split subject URI for associated acquisition entity for nidm:StimulusResponseFile into namespace, uuid - nm,assoc_acq_uuid = split_uri(assoc_acq) - #print("associated acquisition object (stimulus file) uuid: %s" % assoc_acq_uuid) - #if so then add this entity and associate it with acquisition activity and MRI entity - events_obj = AcquisitionObject(acquisition=acquisition,uuid=assoc_acq_uuid) - #link it to appropriate MR acquisition entity + # Cycle through remaining metadata for acquisition entity and add attributes + add_metadata_for_subject( + rdf_graph_parse, + acq_obj, + project.graph.namespaces, + acquisition_obj, + ) + + # MRI acquisitions may have an associated stimulus file so let's see if there is an entity + # prov:wasAttributedTo this acquisition_obj + for assoc_acq in rdf_graph_parse.subjects( + predicate=Constants.PROV["wasAttributedTo"], + object=acq_obj, + ): + # get rdf:type of this entity and check if it's a nidm:StimulusResponseFile or not + # if rdf_graph_parse.triples((assoc_acq, RDF.type, URIRef("http://purl.org/nidash/nidm#StimulusResponseFile"))): + if ( + assoc_acq, + RDF.type, + URIRef(Constants.NIDM_MRI_BOLD_EVENTS._uri), + ) in rdf_graph: + # Split subject URI for associated acquisition entity for nidm:StimulusResponseFile into namespace, uuid + nm, assoc_acq_uuid = split_uri(assoc_acq) + # print("associated acquisition object (stimulus file) uuid: %s" % assoc_acq_uuid) + # if so then add this entity and associate it with acquisition activity and MRI entity + events_obj = AcquisitionObject( + acquisition=acquisition, uuid=assoc_acq_uuid + ) + # link it to appropriate MR acquisition entity events_obj.wasAttributedTo(acquisition_obj) - #cycle through rest of metadata - add_metadata_for_subject(rdf_graph_parse,assoc_acq,project.graph.namespaces,events_obj) - - - - elif (acq_obj, RDF.type, URIRef(Constants.NIDM_MRI_BOLD_EVENTS._uri)) in rdf_graph: - #If this is a stimulus response file - #elif str(acq_modality) == Constants.NIDM_MRI_BOLD_EVENTS: - acquisition=Acquisition(session=session,uuid=acq_uuid) + # cycle through rest of metadata + add_metadata_for_subject( + rdf_graph_parse, + assoc_acq, + project.graph.namespaces, + events_obj, + ) + + elif ( + acq_obj, + RDF.type, + URIRef(Constants.NIDM_MRI_BOLD_EVENTS._uri), + ) in rdf_graph: + # If this is a stimulus response file + # elif str(acq_modality) == Constants.NIDM_MRI_BOLD_EVENTS: + acquisition = Acquisition(session=session, uuid=acq_uuid) if not session.acquisition_exist(acq_uuid): session.add_acquisition(acquisition) - #Cycle through remaining metadata for acquisition activity and add attributes - add_metadata_for_subject (rdf_graph_parse,acq,project.graph.namespaces,acquisition) + # Cycle through remaining metadata for acquisition activity and add attributes + add_metadata_for_subject( + rdf_graph_parse, + acq, + project.graph.namespaces, + acquisition, + ) - #and add acquisition object - acquisition_obj=AcquisitionObject(acquisition=acquisition,uuid=acq_obj_uuid) + # and add acquisition object + acquisition_obj = AcquisitionObject( + acquisition=acquisition, uuid=acq_obj_uuid + ) acquisition.add_acquisition_object(acquisition_obj) - #Cycle through remaining metadata for acquisition entity and add attributes - add_metadata_for_subject(rdf_graph_parse,acq_obj,project.graph.namespaces,acquisition_obj) + # Cycle through remaining metadata for acquisition entity and add attributes + add_metadata_for_subject( + rdf_graph_parse, + acq_obj, + project.graph.namespaces, + acquisition_obj, + ) # check if this is a PET acquisition object - elif (acq_obj, RDF.type,URIRef(Constants.NIDM_PET._uri)) in rdf_graph: + elif ( + acq_obj, + RDF.type, + URIRef(Constants.NIDM_PET._uri), + ) in rdf_graph: acquisition = PETAcquisition(session=session, uuid=acq_uuid) if not session.acquisition_exist(acq_uuid): session.add_acquisition(acquisition) # Cycle through remaining metadata for acquisition activity and add attributes - add_metadata_for_subject(rdf_graph_parse, acq, project.graph.namespaces, acquisition) + add_metadata_for_subject( + rdf_graph_parse, + acq, + project.graph.namespaces, + acquisition, + ) # and add acquisition object - acquisition_obj = PETObject(acquisition=acquisition, uuid=acq_obj_uuid,add_default_type=False) + acquisition_obj = PETObject( + acquisition=acquisition, + uuid=acq_obj_uuid, + add_default_type=False, + ) acquisition.add_acquisition_object(acquisition_obj) # Cycle through remaining metadata for acquisition entity and add attributes - add_metadata_for_subject(rdf_graph_parse, acq_obj, project.graph.namespaces, - acquisition_obj) - - #query whether this is an assessment acquisition by way of looking at the generated entity and determining - #if it has the rdf:type Constants.NIDM_ASSESSMENT_ENTITY - #for acq_modality in rdf_graph_parse.objects(subject=acq_obj,predicate=RDF.type): - elif (acq_obj, RDF.type, URIRef(Constants.NIDM_ASSESSMENT_ENTITY._uri)) in rdf_graph: + add_metadata_for_subject( + rdf_graph_parse, + acq_obj, + project.graph.namespaces, + acquisition_obj, + ) + + # query whether this is an assessment acquisition by way of looking at the generated entity and determining + # if it has the rdf:type Constants.NIDM_ASSESSMENT_ENTITY + # for acq_modality in rdf_graph_parse.objects(subject=acq_obj,predicate=RDF.type): + elif ( + acq_obj, + RDF.type, + URIRef(Constants.NIDM_ASSESSMENT_ENTITY._uri), + ) in rdf_graph: + # if str(acq_modality) == Constants.NIDM_ASSESSMENT_ENTITY._uri: + acquisition = AssessmentAcquisition( + session=session, uuid=acq_uuid, add_default_type=False + ) + # Cycle through remaining metadata for acquisition activity and add attributes + add_metadata_for_subject( + rdf_graph_parse, + acq, + project.graph.namespaces, + acquisition, + ) - #if str(acq_modality) == Constants.NIDM_ASSESSMENT_ENTITY._uri: - acquisition=AssessmentAcquisition(session=session,uuid=acq_uuid,add_default_type=False) - #Cycle through remaining metadata for acquisition activity and add attributes - add_metadata_for_subject (rdf_graph_parse,acq,project.graph.namespaces,acquisition) - - #and add acquisition object - acquisition_obj=AssessmentObject(acquisition=acquisition,uuid=acq_obj_uuid,add_default_type=False) + # and add acquisition object + acquisition_obj = AssessmentObject( + acquisition=acquisition, + uuid=acq_obj_uuid, + add_default_type=False, + ) acquisition.add_acquisition_object(acquisition_obj) - #Cycle through remaining metadata for acquisition entity and add attributes - add_metadata_for_subject(rdf_graph_parse,acq_obj,project.graph.namespaces,acquisition_obj) + # Cycle through remaining metadata for acquisition entity and add attributes + add_metadata_for_subject( + rdf_graph_parse, + acq_obj, + project.graph.namespaces, + acquisition_obj, + ) # if this is a DWI scan then we could have b-value and b-vector files associated - elif ((acq_obj, RDF.type, URIRef(Constants.NIDM_MRI_DWI_BVAL._uri)) in rdf_graph) or \ - ((acq_obj, RDF.type, URIRef(Constants.NIDM_MRI_DWI_BVEC._uri)) in rdf_graph): + elif ( + ( + acq_obj, + RDF.type, + URIRef(Constants.NIDM_MRI_DWI_BVAL._uri), + ) + in rdf_graph + ) or ( + ( + acq_obj, + RDF.type, + URIRef(Constants.NIDM_MRI_DWI_BVEC._uri), + ) + in rdf_graph + ): # If this is a b-values filev acquisition = Acquisition(session=session, uuid=acq_uuid) if not session.acquisition_exist(acq_uuid): session.add_acquisition(acquisition) # Cycle through remaining metadata for acquisition activity and add attributes - add_metadata_for_subject(rdf_graph_parse, acq, project.graph.namespaces, acquisition) + add_metadata_for_subject( + rdf_graph_parse, + acq, + project.graph.namespaces, + acquisition, + ) # and add acquisition object - acquisition_obj = AcquisitionObject(acquisition=acquisition, uuid=acq_obj_uuid) + acquisition_obj = AcquisitionObject( + acquisition=acquisition, uuid=acq_obj_uuid + ) acquisition.add_acquisition_object(acquisition_obj) # Cycle through remaining metadata for acquisition entity and add attributes - add_metadata_for_subject(rdf_graph_parse, acq_obj, project.graph.namespaces, - acquisition_obj) - - - - #This skips rdf_type PROV['Activity'] + add_metadata_for_subject( + rdf_graph_parse, + acq_obj, + project.graph.namespaces, + acquisition_obj, + ) + + # This skips rdf_type PROV['Activity'] else: continue # Query graph for nidm:DataElements and instantiate a nidm:DataElement class and add them to the project - query = ''' + query = """ prefix nidm: - prefix rdfs: + prefix rdfs: select distinct ?uuid where { ?uuid a/rdfs:subClassOf* nidm:DataElement . } - ''' + """ # add all nidm:DataElements in graph qres = rdf_graph_parse.query(query) for row in qres: print(row) # instantiate a data element class assigning it the existing uuid - de = DataElement(project=project, uuid=row['uuid'], add_default_type=False) + de = DataElement(project=project, uuid=row["uuid"], add_default_type=False) # get the rest of the attributes for this data element and store - add_metadata_for_subject(rdf_graph_parse, row['uuid'], project.graph.namespaces, de) + add_metadata_for_subject( + rdf_graph_parse, row["uuid"], project.graph.namespaces, de + ) # now we need to check if there are labels for data element isAbout entries, if so add them. - query2 = ''' + query2 = ( + """ prefix nidm: prefix rdfs: @@ -313,16 +444,18 @@ def read_nidm(nidmDoc): <%s> nidm:isAbout ?id . ?id rdf:type prov:Entity ; - rdfs:label ?label . + rdfs:label ?label . } - ''' % row['uuid'] + """ + % row["uuid"] + ) # print(query2) qres2 = rdf_graph_parse.query(query2) # add this tuple to graph for row2 in qres2: - project.graph.entity(row2[0], {'rdfs:label': row2[1]}) + project.graph.entity(row2[0], {"rdfs:label": row2[1]}) # check for Derivatives. # WIP: Currently FSL, Freesurfer, and ANTS tools add these derivatives as nidm:FSStatsCollection, @@ -330,9 +463,9 @@ def read_nidm(nidmDoc): # this should probably be explicitly indicated in the graphs but currently isn't # Query graph for any of the above Derivatives - query = ''' - prefix nidm: - prefix prov: + query = """ + prefix nidm: + prefix prov: select distinct ?uuid ?parent_act where { {?uuid a nidm:Derivative ; @@ -347,57 +480,63 @@ def read_nidm(nidmDoc): {?uuid a nidm:ANTSStatsCollection ; prov:wasGeneratedBy ?parent_act .} } - - ''' + + """ qres = rdf_graph_parse.query(query) for row in qres: # put this here so the following makes more sense - derivobj_uuid = row['uuid'] + derivobj_uuid = row["uuid"] # if the parent activity of the derivative object (entity) doesn't exist in the graph then create it - if row['parent_act'] not in project.derivatives: - deriv_act = Derivative(project=project, uuid=row['parent_act']) + if row["parent_act"] not in project.derivatives: + deriv_act = Derivative(project=project, uuid=row["parent_act"]) # add additional tripes - add_metadata_for_subject(rdf_graph_parse, row['parent_act'], project.graph.namespaces, deriv_act) + add_metadata_for_subject( + rdf_graph_parse, row["parent_act"], project.graph.namespaces, deriv_act + ) else: for d in project.get_derivatives: - if row['parent_act'] == d.get_uuid(): + if row["parent_act"] == d.get_uuid(): deriv_act = d - #check if derivative object already created and if not create it - #if derivobj_uuid not in deriv_act.get_derivative_objects(): + # check if derivative object already created and if not create it + # if derivobj_uuid not in deriv_act.get_derivative_objects(): # now instantiate the derivative object and add all triples - derivobj = DerivativeObject(derivative=deriv_act,uuid=derivobj_uuid) - add_metadata_for_subject(rdf_graph_parse, row['uuid'], project.graph.namespaces, derivobj) - - + derivobj = DerivativeObject(derivative=deriv_act, uuid=derivobj_uuid) + add_metadata_for_subject( + rdf_graph_parse, row["uuid"], project.graph.namespaces, derivobj + ) - return(project) + return project def get_RDFliteral_type(rdf_literal): - if (rdf_literal.datatype == XSD["integer"]): - #return (int(rdf_literal)) - return(pm.Literal(rdf_literal,datatype=pm.XSD["integer"])) - elif ((rdf_literal.datatype == XSD["float"]) or (rdf_literal.datatype == XSD["double"])): - #return(float(rdf_literal)) - return(pm.Literal(rdf_literal,datatype=pm.XSD["float"])) + if rdf_literal.datatype == XSD["integer"]: + # return (int(rdf_literal)) + return pm.Literal(rdf_literal, datatype=pm.XSD["integer"]) + elif (rdf_literal.datatype == XSD["float"]) or ( + rdf_literal.datatype == XSD["double"] + ): + # return(float(rdf_literal)) + return pm.Literal(rdf_literal, datatype=pm.XSD["float"]) else: - #return (str(rdf_literal)) - return(pm.Literal(rdf_literal,datatype=pm.XSD["string"])) + # return (str(rdf_literal)) + return pm.Literal(rdf_literal, datatype=pm.XSD["string"]) + def find_in_namespaces(search_uri, namespaces): - ''' + """ Looks through namespaces for search_uri :return: URI if found else False - ''' + """ for uris in namespaces: if uris.uri == search_uri: return uris - + return False -def add_metadata_for_subject (rdf_graph,subject_uri,namespaces,nidm_obj): + +def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj): """ Cycles through triples for a particular subject and adds them to the nidm_obj @@ -408,76 +547,109 @@ def add_metadata_for_subject (rdf_graph,subject_uri,namespaces,nidm_obj): :return: None """ - #Cycle through remaining metadata and add attributes + # Cycle through remaining metadata and add attributes for predicate, objects in rdf_graph.predicate_objects(subject=subject_uri): # if this isn't a qualified association, add triples - if predicate != URIRef(Constants.PROV['qualifiedAssociation']): + if predicate != URIRef(Constants.PROV["qualifiedAssociation"]): # make predicate a qualified name obj_nm, obj_term = split_uri(predicate) - found_uri = find_in_namespaces(search_uri=URIRef(obj_nm), namespaces=namespaces) + found_uri = find_in_namespaces( + search_uri=URIRef(obj_nm), namespaces=namespaces + ) # if obj_nm is not in namespaces then it must just be part of some URI in the triple # so just add it as a prov.Identifier - if (not found_uri) and (obj_nm != Constants.PROV) and (obj_nm != Constants.XSD): - predicate = pm.QualifiedName(namespace=Namespace(str(predicate)), localpart="") + if ( + (not found_uri) + and (obj_nm != Constants.PROV) + and (obj_nm != Constants.XSD) + ): + predicate = pm.QualifiedName( + namespace=Namespace(str(predicate)), localpart="" + ) # else add as explicit prov.QualifiedName because it's easier to read - #else: + # else: # predicate = Identifier(predicate) - if (validators.url(objects)) and (predicate != Constants.PROV['Location']): + if (validators.url(objects)) and (predicate != Constants.PROV["Location"]): # try to split the URI to namespace and local parts, if fails just use the entire URI. try: - #create qualified names for objects - obj_nm,obj_term = split_uri(objects) + # create qualified names for objects + obj_nm, obj_term = split_uri(objects) # added because PyNIDM agent, activity, and entity classes already add the type - if ((objects == Constants.PROV['Activity']) or (objects == Constants.PROV['Agent']) or - (objects == Constants.PROV['Entity'])): + if ( + (objects == Constants.PROV["Activity"]) + or (objects == Constants.PROV["Agent"]) + or (objects == Constants.PROV["Entity"]) + ): continue # special case if obj_nm is prov, xsd, or nidm namespaces. These are added # automatically by provDocument so they aren't accessible via the namespaces list # so we check explicitly here - if ((obj_nm == str(Constants.PROV))): - nidm_obj.add_attributes({predicate: QualifiedName(Constants.PROV[obj_term])}) - elif ((obj_nm == str(Constants.NIDM))): - nidm_obj.add_attributes({predicate: QualifiedName(Constants.NIDM[obj_term])}) + if obj_nm == str(Constants.PROV): + nidm_obj.add_attributes( + {predicate: QualifiedName(Constants.PROV[obj_term])} + ) + elif obj_nm == str(Constants.NIDM): + nidm_obj.add_attributes( + {predicate: QualifiedName(Constants.NIDM[obj_term])} + ) else: - found_uri = find_in_namespaces(search_uri=URIRef(obj_nm),namespaces=namespaces) + found_uri = find_in_namespaces( + search_uri=URIRef(obj_nm), namespaces=namespaces + ) # if obj_nm is not in namespaces then it must just be part of some URI in the triple # so just add it as a prov.Identifier if not found_uri: nidm_obj.add_attributes({predicate: Identifier(objects)}) # else add as explicit prov.QualifiedName because it's easier to read else: - nidm_obj.add_attributes({predicate: pm.QualifiedName(found_uri, obj_term)}) + nidm_obj.add_attributes( + {predicate: pm.QualifiedName(found_uri, obj_term)} + ) except: - nidm_obj.add_attributes({predicate: pm.QualifiedName(namespace=Namespace(str(objects)),localpart="")}) + nidm_obj.add_attributes( + { + predicate: pm.QualifiedName( + namespace=Namespace(str(objects)), localpart="" + ) + } + ) else: # check if this is a qname and if so expand it # added to handle when a value is a qname. this should expand it.... - if (":" in objects) and isinstance(objects,URIRef): + if (":" in objects) and isinstance(objects, URIRef): objects = from_n3(objects) # check if objects is a url and if so store it as a URIRef else a Literal - if (validators.url(objects)): + if validators.url(objects): obj_nm, obj_term = split_uri(objects) - nidm_obj.add_attributes({predicate : Identifier(objects)}) + nidm_obj.add_attributes({predicate: Identifier(objects)}) else: - nidm_obj.add_attributes({predicate : get_RDFliteral_type(objects)}) + nidm_obj.add_attributes({predicate: get_RDFliteral_type(objects)}) # now find qualified associations - for bnode in rdf_graph.objects(subject=subject_uri, predicate=Constants.PROV['qualifiedAssociation']): + for bnode in rdf_graph.objects( + subject=subject_uri, predicate=Constants.PROV["qualifiedAssociation"] + ): # create temporary resource for this bnode r = Resource(rdf_graph, bnode) # get the object for this bnode with predicate Constants.PROV['hadRole'] - for r_obj in r.objects(predicate=Constants.PROV['hadRole']): + for r_obj in r.objects(predicate=Constants.PROV["hadRole"]): # if this is a qualified association with a participant then create the prov:Person agent if r_obj.identifier == URIRef(Constants.NIDM_PARTICIPANT.uri): # get identifier for prov:agent part of the blank node - for agent_obj in r.objects(predicate=Constants.PROV['agent']): + for agent_obj in r.objects(predicate=Constants.PROV["agent"]): # check if person exists already in graph, if not create it if agent_obj.identifier not in nidm_obj.graph.get_records(): - person = nidm_obj.add_person(uuid=agent_obj.identifier,add_default_type=False) + person = nidm_obj.add_person( + uuid=agent_obj.identifier, add_default_type=False + ) # add rest of metadata about person - add_metadata_for_subject(rdf_graph=rdf_graph, subject_uri=agent_obj.identifier, - namespaces=namespaces, nidm_obj=person) + add_metadata_for_subject( + rdf_graph=rdf_graph, + subject_uri=agent_obj.identifier, + namespaces=namespaces, + nidm_obj=person, + ) else: # we need the NIDM object here with uuid agent_obj.identifier and store it in person for obj in nidm_obj.graph.get_records(): @@ -485,108 +657,159 @@ def add_metadata_for_subject (rdf_graph,subject_uri,namespaces,nidm_obj): person = obj # create qualified names for objects obj_nm, obj_term = split_uri(r_obj.identifier) - found_uri = find_in_namespaces(search_uri=URIRef(obj_nm),namespaces=namespaces) + found_uri = find_in_namespaces( + search_uri=URIRef(obj_nm), namespaces=namespaces + ) # if obj_nm is not in namespaces then it must just be part of some URI in the triple # so just add it as a prov.Identifier if not found_uri: - #nidm_obj.add_qualified_association(person=person, role=pm.Identifier(r_obj.identifier)) - nidm_obj.add_qualified_association(person=person, role=pm.QualifiedName(Namespace(obj_nm),obj_term)) + # nidm_obj.add_qualified_association(person=person, role=pm.Identifier(r_obj.identifier)) + nidm_obj.add_qualified_association( + person=person, + role=pm.QualifiedName(Namespace(obj_nm), obj_term), + ) else: - nidm_obj.add_qualified_association(person=person, role=pm.QualifiedName(found_uri, obj_term)) + nidm_obj.add_qualified_association( + person=person, role=pm.QualifiedName(found_uri, obj_term) + ) # else it's an association with another agent which isn't a participant else: # get identifier for the prov:agent part of the blank node - for agent_obj in r.objects(predicate=Constants.PROV['agent']): + for agent_obj in r.objects(predicate=Constants.PROV["agent"]): # check if the agent exists in the graph else add it if agent_obj.identifier not in nidm_obj.graph.get_records(): - generic_agent = nidm_obj.graph.agent(identifier=agent_obj.identifier) + generic_agent = nidm_obj.graph.agent( + identifier=agent_obj.identifier + ) # add rest of metadata about the agent - add_metadata_for_subject(rdf_graph=rdf_graph, subject_uri=agent_obj.identifier, - namespaces=namespaces, nidm_obj=generic_agent) + add_metadata_for_subject( + rdf_graph=rdf_graph, + subject_uri=agent_obj.identifier, + namespaces=namespaces, + nidm_obj=generic_agent, + ) # try and split uri into namespace and local parts, if fails just use entire URI try: # create qualified names for objects obj_nm, obj_term = split_uri(r_obj.identifier) - found_uri = find_in_namespaces(search_uri=URIRef(obj_nm), namespaces=namespaces) + found_uri = find_in_namespaces( + search_uri=URIRef(obj_nm), namespaces=namespaces + ) # if obj_nm is not in namespaces then it must just be part of some URI in the triple # so just add it as a prov.Identifier if not found_uri: - - nidm_obj.add_qualified_association(person=generic_agent, - role=pm.QualifiedName(Namespace(obj_nm),obj_term)) + nidm_obj.add_qualified_association( + person=generic_agent, + role=pm.QualifiedName(Namespace(obj_nm), obj_term), + ) else: - nidm_obj.add_qualified_association(person=generic_agent, - role=pm.QualifiedName(found_uri, obj_term)) + nidm_obj.add_qualified_association( + person=generic_agent, + role=pm.QualifiedName(found_uri, obj_term), + ) except: - nidm_obj.add_qualified_association(person=generic_agent, role=pm.QualifiedName(Namespace(r_obj.identifier),"")) + nidm_obj.add_qualified_association( + person=generic_agent, + role=pm.QualifiedName(Namespace(r_obj.identifier), ""), + ) -def QuerySciCrunchElasticSearch(query_string,type='cde', anscestors=True): - ''' +def QuerySciCrunchElasticSearch(query_string, type="cde", anscestors=True): + """ This function will perform an elastic search in SciCrunch on the [query_string] using API [key] and return the json package. :param key: API key from sci crunch :param query_string: arbitrary string to search for terms :param type: default is 'CDE'. Acceptable values are 'cde' or 'pde'. :return: json document of results form elastic search - ''' + """ - #Note, once Jeff Grethe, et al. give us the query to get the ReproNim "tagged" ancestors query we'd do that query first and replace - #the "ancestors.ilx" parameter in the query data package below with new interlex IDs... - #this allows interlex developers to dynamicall change the ancestor terms that are part of the ReproNim term trove and have this - #query use that new information.... + # Note, once Jeff Grethe, et al. give us the query to get the ReproNim "tagged" ancestors query we'd do that query first and replace + # the "ancestors.ilx" parameter in the query data package below with new interlex IDs... + # this allows interlex developers to dynamicall change the ancestor terms that are part of the ReproNim term trove and have this + # query use that new information.... try: os.environ["INTERLEX_API_KEY"] except KeyError: print("Please set the environment variable INTERLEX_API_KEY") sys.exit(1) - #Add check for internet connection, if not then skip this query...return empty dictionary - + # Add check for internet connection, if not then skip this query...return empty dictionary headers = { - 'Content-Type': 'application/json', + "Content-Type": "application/json", } - params = ( - ('key', os.environ["INTERLEX_API_KEY"]), - ) - if type == 'cde': + params = (("key", os.environ["INTERLEX_API_KEY"]),) + if type == "cde": if anscestors: - data = '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "cde" } },\n { "terms" : { "ancestors.ilx" : ["ilx_0115066" , "ilx_0103210", "ilx_0115072", "ilx_0115070"] } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' %query_string + data = ( + '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "cde" } },\n { "terms" : { "ancestors.ilx" : ["ilx_0115066" , "ilx_0103210", "ilx_0115072", "ilx_0115070"] } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' + % query_string + ) else: - data = '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "cde" } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' %query_string - elif type == 'pde': + data = ( + '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "cde" } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' + % query_string + ) + elif type == "pde": if anscestors: - data = '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "pde" } },\n { "terms" : { "ancestors.ilx" : ["ilx_0115066" , "ilx_0103210", "ilx_0115072", "ilx_0115070"] } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' %query_string + data = ( + '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "pde" } },\n { "terms" : { "ancestors.ilx" : ["ilx_0115066" , "ilx_0103210", "ilx_0115072", "ilx_0115070"] } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' + % query_string + ) else: - data = '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "pde" } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' %query_string - elif type == 'fde': + data = ( + '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "pde" } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' + % query_string + ) + elif type == "fde": if anscestors: - data = '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "fde" } },\n { "terms" : { "ancestors.ilx" : ["ilx_0115066" , "ilx_0103210", "ilx_0115072", "ilx_0115070"] } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' %query_string + data = ( + '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "fde" } },\n { "terms" : { "ancestors.ilx" : ["ilx_0115066" , "ilx_0103210", "ilx_0115072", "ilx_0115070"] } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' + % query_string + ) else: - data = '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "fde" } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' %query_string + data = ( + '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "fde" } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' + % query_string + ) - elif type == 'term': + elif type == "term": if anscestors: - data = '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "term" } },\n { "terms" : { "ancestors.ilx" : ["ilx_0115066" , "ilx_0103210", "ilx_0115072", "ilx_0115070"] } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' % query_string + data = ( + '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "term" } },\n { "terms" : { "ancestors.ilx" : ["ilx_0115066" , "ilx_0103210", "ilx_0115072", "ilx_0115070"] } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' + % query_string + ) else: - data = '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "term" } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' % query_string + data = ( + '\n{\n "query": {\n "bool": {\n "must" : [\n { "term" : { "type" : "term" } },\n { "multi_match" : {\n "query": "%s", \n "fields": [ "label", "definition" ] \n } }\n]\n }\n }\n}\n' + % query_string + ) else: - print("ERROR: Valid types for SciCrunch query are 'cde','pde', or 'fde'. You set type: %s " %type) + print( + "ERROR: Valid types for SciCrunch query are 'cde','pde', or 'fde'. You set type: %s " + % type + ) print("ERROR: in function Utils.py/QuerySciCrunchElasticSearch") exit(1) - response = requests.post('https://scicrunch.org/api/1/elastic-ilx/interlex/term/_search#', headers=headers, params=params, data=data) + response = requests.post( + "https://scicrunch.org/api/1/elastic-ilx/interlex/term/_search#", + headers=headers, + params=params, + data=data, + ) return json.loads(response.text) -def GetNIDMTermsFromSciCrunch(query_string,type='cde', ancestor=True): - ''' + +def GetNIDMTermsFromSciCrunch(query_string, type="cde", ancestor=True): + """ Helper function which issues elastic search query of SciCrunch using QuerySciCrunchElasticSearch function and returns terms list with label, definition, and preferred URLs in dictionary :param key: API key from sci crunch @@ -594,35 +817,38 @@ def GetNIDMTermsFromSciCrunch(query_string,type='cde', ancestor=True): :param type: should be 'cde' or 'pde' for the moment :param ancestor: Boolean flag to tell Interlex elastic search to use ancestors (i.e. tagged terms) or not :return: dictionary with keys 'ilx','label','definition','preferred_url' - ''' - - json_data = QuerySciCrunchElasticSearch(query_string,type,ancestor) - results={} - #check if query was successful - if json_data['timed_out'] != True: - #example printing term label, definition, and preferred URL - for term in json_data['hits']['hits']: - #find preferred URL - results[term['_source']['ilx']] = {} - for items in term['_source']['existing_ids']: - if items['preferred']=='1': - results[term['_source']['ilx']]['preferred_url']=items['iri'] - results[term['_source']['ilx']]['label'] = term['_source']['label'] - results[term['_source']['ilx']]['definition'] = term['_source']['definition'] + """ + + json_data = QuerySciCrunchElasticSearch(query_string, type, ancestor) + results = {} + # check if query was successful + if json_data["timed_out"] != True: + # example printing term label, definition, and preferred URL + for term in json_data["hits"]["hits"]: + # find preferred URL + results[term["_source"]["ilx"]] = {} + for items in term["_source"]["existing_ids"]: + if items["preferred"] == "1": + results[term["_source"]["ilx"]]["preferred_url"] = items["iri"] + results[term["_source"]["ilx"]]["label"] = term["_source"]["label"] + results[term["_source"]["ilx"]]["definition"] = term["_source"][ + "definition" + ] return results + def InitializeInterlexRemote(): - ''' + """ This function initializes a connection to Interlex for use in adding personal data elements. To use InterLex it requires you to set an environment variable INTERLEX_API_KEY with your api key :return: interlex object - ''' - #endpoint = "https://scicrunch.org/api/1/" + """ + # endpoint = "https://scicrunch.org/api/1/" # beta endpoint for testing # endpoint = "https://beta.scicrunch.org/api/1/" - InterLexRemote = oq.plugin.get('InterLex') + InterLexRemote = oq.plugin.get("InterLex") # changed per tgbugs changes to InterLexRemote no longer taking api_key as a parameter # set INTERLEX_API_KEY environment variable instead...ilx_cli = InterLexRemote(api_key=key, apiEndpoint=endpoint) ilx_cli = InterLexRemote(apiEndpoint=INTERLEX_ENDPOINT) @@ -631,102 +857,130 @@ def InitializeInterlexRemote(): except Exception as e: print("error initializing InterLex connection...") print("you will not be able to add new personal data elements.") - print("Did you put your scicrunch API key in an environment variable INTERLEX_API_KEY?") + print( + "Did you put your scicrunch API key in an environment variable INTERLEX_API_KEY?" + ) return ilx_cli -def AddPDEToInterlex(ilx_obj,label,definition,units, min, max, datatype, isabout=None, categorymappings=None): - ''' - This function will add the PDE (personal data elements) to Interlex using the Interlex ontquery API. - - :param interlex_obj: Object created using ontquery.plugin.get() function (see: https://github.com/tgbugs/ontquery) + +def AddPDEToInterlex( + ilx_obj, + label, + definition, + units, + min, + max, + datatype, + isabout=None, + categorymappings=None, +): + """ + This function will add the PDE (personal data elements) to Interlex using the Interlex ontquery API. + + :param interlex_obj: Object created using ontquery.plugin.get() function (see: https://github.com/tgbugs/ontquery) :param label: Label for term entity being created :param definition: Definition for term entity being created :param comment: Comments to help understand the object - :return: response from Interlex - ''' + :return: response from Interlex + """ # Interlex uris for predicates, tmp_ prefix dor beta endpoing, ilx_ for production - prefix=INTERLEX_PREFIX + prefix = INTERLEX_PREFIX # for beta testing # prefix = 'tmp' - uri_datatype = 'http://uri.interlex.org/base/' + prefix + '_0382131' - uri_units = 'http://uri.interlex.org/base/' + prefix + '_0382130' - uri_min = 'http://uri.interlex.org/base/' + prefix + '_0382133' - uri_max = 'http://uri.interlex.org/base/' + prefix + '_0382132' - uri_category = 'http://uri.interlex.org/base/' + prefix + '_0382129' - uri_isabout = 'http://uri.interlex.org/base/' + prefix + '_0381385' - + uri_datatype = "http://uri.interlex.org/base/" + prefix + "_0382131" + uri_units = "http://uri.interlex.org/base/" + prefix + "_0382130" + uri_min = "http://uri.interlex.org/base/" + prefix + "_0382133" + uri_max = "http://uri.interlex.org/base/" + prefix + "_0382132" + uri_category = "http://uri.interlex.org/base/" + prefix + "_0382129" + uri_isabout = "http://uri.interlex.org/base/" + prefix + "_0381385" # return ilx_obj.add_pde(label=label, definition=definition, comment=comment, type='pde') if categorymappings is not None: if isabout is not None: - tmp = ilx_obj.add_pde(label=label, definition=definition, predicates = { - uri_datatype : datatype, - uri_units : units, - uri_min : min, - uri_max : max, - uri_isabout : isabout, - uri_category : categorymappings - }) + tmp = ilx_obj.add_pde( + label=label, + definition=definition, + predicates={ + uri_datatype: datatype, + uri_units: units, + uri_min: min, + uri_max: max, + uri_isabout: isabout, + uri_category: categorymappings, + }, + ) else: - tmp = ilx_obj.add_pde(label=label, definition=definition, predicates = { - uri_datatype : datatype, - uri_units : units, - uri_min : min, - uri_max : max, - uri_category : categorymappings - }) + tmp = ilx_obj.add_pde( + label=label, + definition=definition, + predicates={ + uri_datatype: datatype, + uri_units: units, + uri_min: min, + uri_max: max, + uri_category: categorymappings, + }, + ) else: if isabout is not None: - tmp = ilx_obj.add_pde(label=label, definition=definition, predicates = { - - uri_datatype : datatype, - uri_units : units, - uri_min : min, - uri_max : max, - uri_isabout : isabout - }) + tmp = ilx_obj.add_pde( + label=label, + definition=definition, + predicates={ + uri_datatype: datatype, + uri_units: units, + uri_min: min, + uri_max: max, + uri_isabout: isabout, + }, + ) else: - tmp = ilx_obj.add_pde(label=label, definition=definition, predicates = { - - uri_datatype : datatype, - uri_units : units, - uri_min : min, - uri_max : max - }) + tmp = ilx_obj.add_pde( + label=label, + definition=definition, + predicates={ + uri_datatype: datatype, + uri_units: units, + uri_min: min, + uri_max: max, + }, + ) return tmp + def AddConceptToInterlex(ilx_obj, label, definition): - ''' - This function will add a concept to Interlex using the Interlex ontquery API. + """ + This function will add a concept to Interlex using the Interlex ontquery API. - :param ilx_obj: Object created using ontquery.plugin.get() function (see: https://github.com/tgbugs/ontquery) - :param label: Label for term entity being created - :param definition: Definition for term entity being created - :param comment: Comments to help understand the object - :return: response from Interlex - ''' + :param ilx_obj: Object created using ontquery.plugin.get() function (see: https://github.com/tgbugs/ontquery) + :param label: Label for term entity being created + :param definition: Definition for term entity being created + :param comment: Comments to help understand the object + :return: response from Interlex + """ # Interlex uris for predicates, tmp_ prefix dor beta endpoing, ilx_ for production - #prefix = 'ilx' + # prefix = 'ilx' # for beta testing prefix = INTERLEX_PREFIX tmp = ilx_obj.add_pde(label=label, definition=definition) return tmp + + def load_nidm_terms_concepts(): - ''' + """ This function will pull NIDM-Terms used concepts from the NIDM-Terms repo. These are concepts used in annotating other datasets and should be used prior to broadening the search to InterLex and CogAtlas concepts. By using these first, ones that have already been used to annotate datasets, we maximize our ability to find concept-based query matches across datasets :return: - ''' + """ concept_url = "https://raw.githubusercontent.com/NIDM-Terms/terms/master/terms/NIDM_Concepts.jsonld" - try: response = urlopen(concept_url) concept_graph = json.loads(response.read().decode("utf-8")) @@ -738,23 +992,22 @@ def load_nidm_terms_concepts(): def load_nidm_owl_files(): - ''' + """ This function loads the NIDM-experiment related OWL files and imports, creates a union graph and returns it. :return: graph of all OWL files and imports from PyNIDM experiment - ''' - #load nidm-experiment.owl file and all imports directly - #create empty graph + """ + # load nidm-experiment.owl file and all imports directly + # create empty graph union_graph = Graph() - ## COMMENTED OUT BY DBK (5/13/21). CHANGING TO GET OWL FILES DIRECTORY FROM NIDM-SPECS REPO # - #check if there is an internet connection, if so load directly from https://github.com/incf-nidash/nidm-specs/tree/master/nidm/nidm-experiment/terms and - #basepath=os.path.dirname(os.path.dirname(__file__)) - #terms_path = os.path.join(basepath,"terms") - #imports_path=os.path.join(basepath,"terms","imports") + # check if there is an internet connection, if so load directly from https://github.com/incf-nidash/nidm-specs/tree/master/nidm/nidm-experiment/terms and + # basepath=os.path.dirname(os.path.dirname(__file__)) + # terms_path = os.path.join(basepath,"terms") + # imports_path=os.path.join(basepath,"terms","imports") # - #imports=[ + # imports=[ # "crypto_import.ttl", # "dc_import.ttl", # "iao_import.ttl", @@ -767,10 +1020,10 @@ def load_nidm_owl_files(): # "qibo_import.ttl", # "sio_import.ttl", # "stato_import.ttl" - #] + # ] ##load each import - #for resource in imports: + # for resource in imports: # temp_graph = Graph() # try: # @@ -781,103 +1034,109 @@ def load_nidm_owl_files(): # logging.info("Error opening %s import file..continuing" %os.path.join(imports_path,resource)) # continue - owls=[ - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/crypto_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/dc_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/dicom_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/iao_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/nfo_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/obi_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/ontoneurolog_instruments_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/pato_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/pato_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/prv_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/sio_import.ttl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/terms/nidm-experiment.owl", - "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-results/terms/nidm-results.owl" + owls = [ + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/crypto_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/dc_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/dicom_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/iao_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/nfo_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/obi_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/ontoneurolog_instruments_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/pato_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/pato_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/prv_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/imports/sio_import.ttl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-experiment/terms/nidm-experiment.owl", + "https://raw.githubusercontent.com/incf-nidash/nidm-specs/master/nidm/nidm-results/terms/nidm-results.owl", ] - #load each owl file + # load each owl file for resource in owls: temp_graph = Graph() try: temp_graph.parse(location=resource, format="turtle") - union_graph=union_graph+temp_graph + union_graph = union_graph + temp_graph except Exception: - logging.info("Error opening %s owl file..continuing" %resource) + logging.info("Error opening %s owl file..continuing" % resource) continue - return union_graph - -def fuzzy_match_terms_from_graph(graph,query_string): - ''' +def fuzzy_match_terms_from_graph(graph, query_string): + """ This function performs a fuzzy match of the constants in Constants.py list nidm_experiment_terms for term constants matching the query....i ideally this should really be searching the OWL file when it's ready :param query_string: string to query :return: dictionary whose key is the NIDM constant and value is the match score to the query - ''' - + """ - match_scores={} + match_scores = {} - #search for labels rdfs:label and obo:IAO_0000115 (description) for each rdf:type owl:Class + # search for labels rdfs:label and obo:IAO_0000115 (description) for each rdf:type owl:Class for term in graph.subjects(predicate=RDF.type, object=Constants.OWL["Class"]): - for label in graph.objects(subject=term, predicate=Constants.RDFS['label']): + for label in graph.objects(subject=term, predicate=Constants.RDFS["label"]): match_scores[term] = {} - match_scores[term]['score'] = fuzz.token_sort_ratio(query_string,label) - match_scores[term]['label'] = label - match_scores[term]['url'] = term - match_scores[term]['definition']=None - for description in graph.objects(subject=term,predicate=Constants.OBO["IAO_0000115"]): - match_scores[term]['definition'] =description - - #for term in owl_graph.classes(): + match_scores[term]["score"] = fuzz.token_sort_ratio(query_string, label) + match_scores[term]["label"] = label + match_scores[term]["url"] = term + match_scores[term]["definition"] = None + for description in graph.objects( + subject=term, predicate=Constants.OBO["IAO_0000115"] + ): + match_scores[term]["definition"] = description + + # for term in owl_graph.classes(): # print(term.get_properties()) return match_scores -def fuzzy_match_concepts_from_nidmterms_jsonld(json_struct,query_string): + +def fuzzy_match_concepts_from_nidmterms_jsonld(json_struct, query_string): match_scores = {} # search for labels rdfs:label and obo:IAO_0000115 (description) for each rdf:type owl:Class - for entry in json_struct['terms']: - match_scores[entry['label']] = {} - match_scores[entry['label']]['score'] = fuzz.token_sort_ratio(query_string, entry['label']) - match_scores[entry['label']]['label'] = entry['label'] + for entry in json_struct["terms"]: + match_scores[entry["label"]] = {} + match_scores[entry["label"]]["score"] = fuzz.token_sort_ratio( + query_string, entry["label"] + ) + match_scores[entry["label"]]["label"] = entry["label"] if "schema:url" in entry.keys(): - match_scores[entry['label']]['url'] = entry["schema:url"] + match_scores[entry["label"]]["url"] = entry["schema:url"] else: - match_scores[entry['label']]['url'] = "" - if 'description' in entry.keys(): - match_scores[entry['label']]['definition'] = entry['description'] + match_scores[entry["label"]]["url"] = "" + if "description" in entry.keys(): + match_scores[entry["label"]]["definition"] = entry["description"] else: - match_scores[entry['label']]['definition'] = "" + match_scores[entry["label"]]["definition"] = "" # for term in owl_graph.classes(): # print(term.get_properties()) return match_scores -def fuzzy_match_terms_from_cogatlas_json(json_struct,query_string): - match_scores={} +def fuzzy_match_terms_from_cogatlas_json(json_struct, query_string): + match_scores = {} - #search for labels rdfs:label and obo:IAO_0000115 (description) for each rdf:type owl:Class + # search for labels rdfs:label and obo:IAO_0000115 (description) for each rdf:type owl:Class for entry in json_struct: + match_scores[entry["name"]] = {} + match_scores[entry["name"]]["score"] = fuzz.token_sort_ratio( + query_string, entry["name"] + ) + match_scores[entry["name"]]["label"] = entry["name"] + match_scores[entry["name"]]["url"] = ( + "https://www.cognitiveatlas.org/concept/id/" + entry["id"] + ) + match_scores[entry["name"]]["definition"] = entry["definition_text"] - match_scores[entry['name']] = {} - match_scores[entry['name']]['score'] = fuzz.token_sort_ratio(query_string,entry['name']) - match_scores[entry['name']]['label'] = entry['name'] - match_scores[entry['name']]['url'] = "https://www.cognitiveatlas.org/concept/id/" + entry['id'] - match_scores[entry['name']]['definition']=entry['definition_text'] - - #for term in owl_graph.classes(): + # for term in owl_graph.classes(): # print(term.get_properties()) return match_scores -def authenticate_github(authed=None,credentials=None): - ''' + +def authenticate_github(authed=None, credentials=None): + """ This function will hangle GitHub authentication with or without a token. If the parameter authed is defined the function will check whether it's an active/valid authentication object. If not, and username/token is supplied then an authentication object will be created. If username + token is not supplied then the user will be prompted to input @@ -886,114 +1145,129 @@ def authenticate_github(authed=None,credentials=None): :param credentials: Optional GitHub credential list username,password or username,token :return: GitHub authentication object or None if unsuccessful - ''' + """ print("GitHub authentication...") - indx=1 - maxtry=5 + indx = 1 + maxtry = 5 while indx < maxtry: - if (len(credentials)>= 2): - #authenticate with token - g=Github(credentials[0],credentials[1]) - elif (len(credentials)==1): + if len(credentials) >= 2: + # authenticate with token + g = Github(credentials[0], credentials[1]) + elif len(credentials) == 1: pw = getpass.getpass("Please enter your GitHub password: ") - g=Github(credentials[0],pw) + g = Github(credentials[0], pw) else: username = input("Please enter your GitHub user name: ") pw = getpass.getpass("Please enter your GitHub password: ") - #try to logging into GitHub - g=Github(username,pw) + # try to logging into GitHub + g = Github(username, pw) - authed=g.get_user() + authed = g.get_user() try: - #check we're logged in by checking that we can access the public repos list - repo=authed.public_repos + # check we're logged in by checking that we can access the public repos list + repo = authed.public_repos logging.info("Github authentication successful") - new_term=False + new_term = False break except GithubException as e: logging.info("error logging into your github account, please try again...") - indx=indx+1 + indx = indx + 1 - if (indx == maxtry): - logging.critical("GitHub authentication failed. Check your username / password / token and try again") + if indx == maxtry: + logging.critical( + "GitHub authentication failed. Check your username / password / token and try again" + ) return None else: - return authed,g + return authed, g -def getSubjIDColumn(column_to_terms,df): - ''' + +def getSubjIDColumn(column_to_terms, df): + """ This function returns column number from CSV file that matches subjid. If it can't automatically detect it based on the Constants.NIDM_SUBJECTID term (i.e. if the user selected a different term to annotate subject ID then it asks the user. :param column_to_terms: json variable->term mapping dictionary made by nidm.experiment.Utils.map_variables_to_terms :param df: dataframe of CSV file with tabular data to convert to RDF. :return: subject ID column number in CSV dataframe - ''' + """ - #look at column_to_terms dictionary for NIDM URL for subject id (Constants.NIDM_SUBJECTID) - id_field=None + # look at column_to_terms dictionary for NIDM URL for subject id (Constants.NIDM_SUBJECTID) + id_field = None for key, value in column_to_terms.items(): - if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']: - id_field=key + if Constants.NIDM_SUBJECTID._str == column_to_terms[key]["label"]: + id_field = key - #if we couldn't find a subject ID field in column_to_terms, ask user + # if we couldn't find a subject ID field in column_to_terms, ask user if id_field is None: - option=1 + option = 1 for column in df.columns: - print("%d: %s" %(option,column)) - option=option+1 - selection=input("Please select the subject ID field from the list above: ") - id_field=df.columns[int(selection)-1] + print("%d: %s" % (option, column)) + option = option + 1 + selection = input("Please select the subject ID field from the list above: ") + id_field = df.columns[int(selection) - 1] return id_field -def redcap_datadictionary_to_json(redcap_dd_file,assessment_name): - ''' + +def redcap_datadictionary_to_json(redcap_dd_file, assessment_name): + """ This function will convert a redcap data dictionary to our json data elements structure :param redcap_dd: RedCap data dictionary :return: json data element definitions - ''' + """ # load redcap data dictionary redcap_dd = pd.read_csv(redcap_dd_file) - json_map={} + json_map = {} # cycle through rows and store variable data elements - for index,row in redcap_dd.iterrows(): - current_tuple = str(DD(source=assessment_name, variable=row['Variable / Field Name'])) + for index, row in redcap_dd.iterrows(): + current_tuple = str( + DD(source=assessment_name, variable=row["Variable / Field Name"]) + ) json_map[current_tuple] = {} - json_map[current_tuple]['label'] = row['Variable / Field Name'] - json_map[current_tuple]['source_variable'] = row['Variable / Field Name'] - json_map[current_tuple]['description'] = row['Field Label'] - if not pd.isnull(row['Choices OR Calculations']): - if row['Field Type'] == 'calc': + json_map[current_tuple]["label"] = row["Variable / Field Name"] + json_map[current_tuple]["source_variable"] = row["Variable / Field Name"] + json_map[current_tuple]["description"] = row["Field Label"] + if not pd.isnull(row["Choices OR Calculations"]): + if row["Field Type"] == "calc": # this is a calculated field so it typically has a sum([var1],[var2],..,etc) so we'll just store # it has as a single level - json_map[current_tuple]['levels'] = [] - json_map[current_tuple]['levels'].append(str(row['Choices OR Calculations'])) + json_map[current_tuple]["levels"] = [] + json_map[current_tuple]["levels"].append( + str(row["Choices OR Calculations"]) + ) else: - split_choices = row['Choices OR Calculations'].split("|") + split_choices = row["Choices OR Calculations"].split("|") if len(split_choices) == 1: - json_map[current_tuple]['levels'] = [] - json_map[current_tuple]['valueType'] = URIRef(Constants.XSD["complexType"]) - split_choices = row['Choices OR Calculations'].split(",") + json_map[current_tuple]["levels"] = [] + json_map[current_tuple]["valueType"] = URIRef( + Constants.XSD["complexType"] + ) + split_choices = row["Choices OR Calculations"].split(",") for choices in split_choices: - json_map[current_tuple]['levels'].append(choices.strip()) + json_map[current_tuple]["levels"].append(choices.strip()) else: - json_map[current_tuple]['levels'] = {} - json_map[current_tuple]['valueType'] = URIRef(Constants.XSD["complexType"]) + json_map[current_tuple]["levels"] = {} + json_map[current_tuple]["valueType"] = URIRef( + Constants.XSD["complexType"] + ) for choices in split_choices: - key_value=choices.split(",") - json_map[current_tuple]['levels'][str(key_value[0]).strip()] = str(key_value[1]).strip() + key_value = choices.split(",") + json_map[current_tuple]["levels"][ + str(key_value[0]).strip() + ] = str(key_value[1]).strip() else: - json_map[current_tuple]['valueType'] = URIRef(Constants.XSD["string"]) + json_map[current_tuple]["valueType"] = URIRef(Constants.XSD["string"]) return json_map + def detect_json_format(json_map): - ''' + """ This function will take a json "sidecar" file or json annotation data dictionary structure and determine if it''s consistent with the ReproSchema structure (compound keys root-level keys DD(source=XXX,variable=YYY) and 'responseOptions' subkeys), the older pynidm format (compound @@ -1003,9 +1277,9 @@ def detect_json_format(json_map): :param json_map: json annotation file dictionary (file already loaded with json.load) - ''' + """ - for key,value in json_map.keys(): + for key, value in json_map.keys(): if "DD(" in key: if "responseOptions" in value.keys(): return "REPROSCHEMA" @@ -1014,25 +1288,42 @@ def detect_json_format(json_map): else: return "BIDS" + def match_participant_id_field(source_variable): - ''' + """ This function will test whether the source_variable is a participant ID field or not by string matching. :param source_variable: source variable string to test - ''' - - if ((("participant_id" in source_variable.lower()) or - ("subject_id" in source_variable.lower()) or - (("participant" in source_variable.lower()) and ("id" in source_variable.lower())) or - (("subject" in source_variable.lower()) and ("id" in source_variable.lower())) or - (("sub" in source_variable.lower()) and ("id" in source_variable.lower())))): + """ + if ( + ("participant_id" in source_variable.lower()) + or ("subject_id" in source_variable.lower()) + or ( + ("participant" in source_variable.lower()) + and ("id" in source_variable.lower()) + ) + or ( + ("subject" in source_variable.lower()) and ("id" in source_variable.lower()) + ) + or (("sub" in source_variable.lower()) and ("id" in source_variable.lower())) + ): return True else: return False -def map_variables_to_terms(df,directory, assessment_name, output_file=None,json_source=None,bids=False,owl_file='nidm', - associate_concepts=True, dataset_identifier=None): - ''' + +def map_variables_to_terms( + df, + directory, + assessment_name, + output_file=None, + json_source=None, + bids=False, + owl_file="nidm", + associate_concepts=True, + dataset_identifier=None, +): + """ :param df: data frame with first row containing variable names :param assessment_name: Name for the assessment to use in storing JSON mapping dictionary keys @@ -1042,8 +1333,7 @@ def map_variables_to_terms(df,directory, assessment_name, output_file=None,json_ :param directory: if output_file parameter is set to None then use this directory to store default JSON mapping file if doing variable->term mappings :return:return dictionary mapping variable names (i.e. columns) to terms - ''' - + """ # dictionary mapping column name to preferred term column_to_terms = {} @@ -1054,21 +1344,22 @@ def map_variables_to_terms(df,directory, assessment_name, output_file=None,json_ # check if json_source is a file if os.path.isfile(json_source): # load file - with open(json_source,'r') as f: + with open(json_source, "r") as f: json_map = json.load(f) else: - print("ERROR: Can't open json mapping file: %s" %(json_source)) + print("ERROR: Can't open json mapping file: %s" % (json_source)) exit() except: # if not then it's a json structure already json_map = json_source # added check to make sure json_map is valid dictionary - if not isinstance(json_map,dict): - print("ERROR: Invalid JSON file supplied. Please check your JSON file with a validator first!") + if not isinstance(json_map, dict): + print( + "ERROR: Invalid JSON file supplied. Please check your JSON file with a validator first!" + ) print("exiting!") exit() - # if no JSON mapping file was specified then create a default one for variable-term mappings # create a json_file filename from the output file filename if output_file is None: @@ -1080,9 +1371,9 @@ def map_variables_to_terms(df,directory, assessment_name, output_file=None,json_ except Exception as e: print("ERROR: initializing InterLex connection...") print("You will not be able to add or query for concepts.") - ilx_obj=None + ilx_obj = None # load NIDM OWL files if user requested it - if owl_file=='nidm': + if owl_file == "nidm": try: nidm_owl_graph = load_nidm_owl_files() except Exception as e: @@ -1099,275 +1390,538 @@ def map_variables_to_terms(df,directory, assessment_name, output_file=None,json_ # iterate over columns for column in df.columns: - # set up a dictionary entry for this column current_tuple = str(DD(source=assessment_name, variable=column)) # if we loaded a json file with existing mappings try: json_map - #try: - # check for column in json file + # try: + # check for column in json file try: - json_key = [key for key in json_map if column.lstrip().rstrip() == - key.split("variable")[1].split("=")[1].split(")")[0].lstrip("'").rstrip("'")] + json_key = [ + key + for key in json_map + if column.lstrip().rstrip() + == key.split("variable")[1] + .split("=")[1] + .split(")")[0] + .lstrip("'") + .rstrip("'") + ] except Exception as e: if "list index out of range" in str(e): - json_key = [key for key in json_map if column.lstrip().rstrip() == key] - + json_key = [ + key for key in json_map if column.lstrip().rstrip() == key + ] finally: - - if (json_map is not None) and (len(json_key)>0): + if (json_map is not None) and (len(json_key) > 0): column_to_terms[current_tuple] = {} # added in case for some reason there isn't a label key, try source_variable and if it's # a key then add this as the label as well. - if 'label' not in json_map[json_key[0]].keys(): - if ('source_variable' in json_map[json_key[0]].keys()): - column_to_terms[current_tuple]['label'] = json_map[json_key[0]]['source_variable'] - elif ('sourceVariable' in json_map[json_key[0]].keys()): - column_to_terms[current_tuple]['label'] = json_map[json_key[0]]['sourceVariable'] + if "label" not in json_map[json_key[0]].keys(): + if "source_variable" in json_map[json_key[0]].keys(): + column_to_terms[current_tuple]["label"] = json_map[ + json_key[0] + ]["source_variable"] + elif "sourceVariable" in json_map[json_key[0]].keys(): + column_to_terms[current_tuple]["label"] = json_map[ + json_key[0] + ]["sourceVariable"] else: - column_to_terms[current_tuple]['label'] = "" - print("No label or source_variable or sourceVariable keys found in json mapping file for variable " - "%s. Consider adding these to the json file as they are important" %json_key[0]) + column_to_terms[current_tuple]["label"] = "" + print( + "No label or source_variable or sourceVariable keys found in json mapping file for variable " + "%s. Consider adding these to the json file as they are important" + % json_key[0] + ) else: - column_to_terms[current_tuple]['label'] = json_map[json_key[0]]['label'] + column_to_terms[current_tuple]["label"] = json_map[json_key[0]][ + "label" + ] # added this bit to account for BIDS json files using "Description" whereas we use "description" # everywhere else - if 'description' in json_map[json_key[0]].keys(): - column_to_terms[current_tuple]['description'] = json_map[json_key[0]]['description'] - elif 'Description' in json_map[json_key[0]].keys(): - column_to_terms[current_tuple]['description'] = json_map[json_key[0]]['Description'] + if "description" in json_map[json_key[0]].keys(): + column_to_terms[current_tuple]["description"] = json_map[ + json_key[0] + ]["description"] + elif "Description" in json_map[json_key[0]].keys(): + column_to_terms[current_tuple]["description"] = json_map[ + json_key[0] + ]["Description"] else: - column_to_terms[current_tuple]['description'] = "" + column_to_terms[current_tuple]["description"] = "" # column_to_terms[current_tuple]['variable'] = json_map[json_key[0]]['variable'] - print("\n*************************************************************************************") - print("Column %s already annotated in user supplied JSON mapping file" %column) - print("label: %s" %column_to_terms[current_tuple]['label']) - print("description: %s" %column_to_terms[current_tuple]['description']) - if 'url' in json_map[json_key[0]]: - column_to_terms[current_tuple]['url'] = json_map[json_key[0]]['url'] - print("url: %s" %column_to_terms[current_tuple]['url']) + print( + "\n*************************************************************************************" + ) + print( + "Column %s already annotated in user supplied JSON mapping file" + % column + ) + print("label: %s" % column_to_terms[current_tuple]["label"]) + print( + "description: %s" + % column_to_terms[current_tuple]["description"] + ) + if "url" in json_map[json_key[0]]: + column_to_terms[current_tuple]["url"] = json_map[json_key[0]][ + "url" + ] + print("url: %s" % column_to_terms[current_tuple]["url"]) # print("Variable: %s" %column_to_terms[current_tuple]['variable']) - if 'sameAs' in json_map[json_key[0]]: - column_to_terms[current_tuple]['sameAs'] = json_map[json_key[0]]['sameAs'] - print("sameAs: %s" %column_to_terms[current_tuple]['sameAs']) - if 'url' in json_map[json_key[0]]: - column_to_terms[current_tuple]['url'] = json_map[json_key[0]]['url'] - print("url: %s" % column_to_terms[current_tuple]['url']) - - if 'source_variable' in json_map[json_key[0]]: - column_to_terms[current_tuple]['source_variable'] = json_map[json_key[0]]['source_variable'] - print("source variable: %s" % column_to_terms[current_tuple]['source_variable']) - elif 'sourceVariable' in json_map[json_key[0]]: - column_to_terms[current_tuple]['source_variable'] = json_map[json_key[0]]['sourceVariable'] - print("source variable: %s" % column_to_terms[current_tuple]['source_variable']) + if "sameAs" in json_map[json_key[0]]: + column_to_terms[current_tuple]["sameAs"] = json_map[ + json_key[0] + ]["sameAs"] + print("sameAs: %s" % column_to_terms[current_tuple]["sameAs"]) + if "url" in json_map[json_key[0]]: + column_to_terms[current_tuple]["url"] = json_map[json_key[0]][ + "url" + ] + print("url: %s" % column_to_terms[current_tuple]["url"]) + + if "source_variable" in json_map[json_key[0]]: + column_to_terms[current_tuple]["source_variable"] = json_map[ + json_key[0] + ]["source_variable"] + print( + "source variable: %s" + % column_to_terms[current_tuple]["source_variable"] + ) + elif "sourceVariable" in json_map[json_key[0]]: + column_to_terms[current_tuple]["source_variable"] = json_map[ + json_key[0] + ]["sourceVariable"] + print( + "source variable: %s" + % column_to_terms[current_tuple]["source_variable"] + ) else: # add source variable if not there... - column_to_terms[current_tuple]['source_variable'] = str(column) - print("Added source variable (%s) to annotations" %column) + column_to_terms[current_tuple]["source_variable"] = str(column) + print("Added source variable (%s) to annotations" % column) if "associatedWith" in json_map[json_key[0]]: - column_to_terms[current_tuple]['associatedWith'] = json_map[json_key[0]]['associatedWith'] - print("associatedWith: %s" % column_to_terms[current_tuple]['associatedWith']) + column_to_terms[current_tuple]["associatedWith"] = json_map[ + json_key[0] + ]["associatedWith"] + print( + "associatedWith: %s" + % column_to_terms[current_tuple]["associatedWith"] + ) if "allowableValues" in json_map[json_key[0]]: - column_to_terms[current_tuple]['allowableValues'] = json_map[json_key[0]]['allowableValues'] - print("allowableValues: %s" % column_to_terms[current_tuple]['allowableValues']) + column_to_terms[current_tuple]["allowableValues"] = json_map[ + json_key[0] + ]["allowableValues"] + print( + "allowableValues: %s" + % column_to_terms[current_tuple]["allowableValues"] + ) # added to support ReproSchema json format - if 'responseOptions' in json_map[json_key[0]]: - for subkey, subvalye in json_map[json_key[0]]['responseOptions'].items(): - if 'valueType' in subkey: - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - - column_to_terms[current_tuple]['responseOptions']['valueType'] = \ - json_map[json_key[0]]['responseOptions']['valueType'] - print("valueType: %s" % column_to_terms[current_tuple]['responseOptions'][ - 'valueType']) - - elif 'minValue' in subkey: - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - - column_to_terms[current_tuple]['responseOptions']['minValue'] = \ - json_map[json_key[0]]['responseOptions']['minValue'] + if "responseOptions" in json_map[json_key[0]]: + for subkey, subvalye in json_map[json_key[0]][ + "responseOptions" + ].items(): + if "valueType" in subkey: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple][ + "responseOptions" + ] = {} + + column_to_terms[current_tuple]["responseOptions"][ + "valueType" + ] = json_map[json_key[0]]["responseOptions"][ + "valueType" + ] print( - "minValue: %s" % column_to_terms[current_tuple]['responseOptions']['minValue']) - - elif 'maxValue' in subkey: - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - - column_to_terms[current_tuple]['responseOptions']['maxValue'] = \ - json_map[json_key[0]]['responseOptions']['maxValue'] + "valueType: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "valueType" + ] + ) + + elif "minValue" in subkey: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple][ + "responseOptions" + ] = {} + + column_to_terms[current_tuple]["responseOptions"][ + "minValue" + ] = json_map[json_key[0]]["responseOptions"]["minValue"] + print( + "minValue: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "minValue" + ] + ) + + elif "maxValue" in subkey: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple][ + "responseOptions" + ] = {} + + column_to_terms[current_tuple]["responseOptions"][ + "maxValue" + ] = json_map[json_key[0]]["responseOptions"]["maxValue"] + print( + "maxValue: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "maxValue" + ] + ) + elif "choices" in subkey: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple][ + "responseOptions" + ] = {} + + column_to_terms[current_tuple]["responseOptions"][ + "choices" + ] = json_map[json_key[0]]["responseOptions"]["choices"] + print( + "levels: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "choices" + ] + ) + elif "hasUnit" in subkey: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple][ + "responseOptions" + ] = {} + + column_to_terms[current_tuple]["responseOptions"][ + "unitCode" + ] = json_map[json_key[0]]["responseOptions"]["hasUnit"] + print( + "units: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "unitCode" + ] + ) + elif "unitCode" in subkey: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple][ + "responseOptions" + ] = {} + + column_to_terms[current_tuple]["responseOptions"][ + "unitCode" + ] = json_map[json_key[0]]["responseOptions"]["unitCode"] print( - "maxValue: %s" % column_to_terms[current_tuple]['responseOptions']['maxValue']) - elif 'choices' in subkey: - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - - column_to_terms[current_tuple]['responseOptions']['choices'] = \ - json_map[json_key[0]]['responseOptions']['choices'] - print("levels: %s" % column_to_terms[current_tuple]['responseOptions']['choices']) - elif 'hasUnit' in subkey: - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - - column_to_terms[current_tuple]['responseOptions']['unitCode'] = \ - json_map[json_key[0]]['responseOptions']['hasUnit'] - print("units: %s" % column_to_terms[current_tuple]['responseOptions']['unitCode']) - elif 'unitCode' in subkey: - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - - column_to_terms[current_tuple]['responseOptions']['unitCode'] = \ - json_map[json_key[0]]['responseOptions']['unitCode'] - print("units: %s" % column_to_terms[current_tuple]['responseOptions']['unitCode']) - - if 'levels' in json_map[json_key[0]]: + "units: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "unitCode" + ] + ) + + if "levels" in json_map[json_key[0]]: # upgrade 'levels' to 'responseOptions'->'choices' - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - column_to_terms[current_tuple]['responseOptions']['choices'] = json_map[json_key[0]][ - 'levels'] - print("choices: %s" % column_to_terms[current_tuple]['responseOptions']['choices']) - elif 'Levels' in json_map[json_key[0]]: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple]["responseOptions"] = {} + column_to_terms[current_tuple]["responseOptions"][ + "choices" + ] = json_map[json_key[0]]["levels"] + print( + "choices: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "choices" + ] + ) + elif "Levels" in json_map[json_key[0]]: # upgrade 'levels' to 'responseOptions'->'choices' - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - column_to_terms[current_tuple]['responseOptions']['choices'] = json_map[json_key[0]][ - 'Levels'] - print("levels: %s" % column_to_terms[current_tuple]['responseOptions']['choices']) - - if 'valueType' in json_map[json_key[0]]: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple]["responseOptions"] = {} + column_to_terms[current_tuple]["responseOptions"][ + "choices" + ] = json_map[json_key[0]]["Levels"] + print( + "levels: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "choices" + ] + ) + + if "valueType" in json_map[json_key[0]]: # upgrade 'valueType' to 'responseOptions'->'valueType - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - column_to_terms[current_tuple]['responseOptions']['valueType'] = \ - json_map[json_key[0]]['valueType'] - print("valueType: %s" % column_to_terms[current_tuple]['responseOptions']['valueType']) - - if ('minValue' in json_map[json_key[0]]): + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple]["responseOptions"] = {} + column_to_terms[current_tuple]["responseOptions"][ + "valueType" + ] = json_map[json_key[0]]["valueType"] + print( + "valueType: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "valueType" + ] + ) + + if "minValue" in json_map[json_key[0]]: # upgrade 'minValue' to 'responseOptions'->'minValue - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - column_to_terms[current_tuple]['responseOptions']['minValue'] = \ - json_map[json_key[0]]['minValue'] - print("minValue: %s" % column_to_terms[current_tuple]['responseOptions']['minValue']) - elif ('minimumValue' in json_map[json_key[0]]): + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple]["responseOptions"] = {} + column_to_terms[current_tuple]["responseOptions"][ + "minValue" + ] = json_map[json_key[0]]["minValue"] + print( + "minValue: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "minValue" + ] + ) + elif "minimumValue" in json_map[json_key[0]]: # upgrade 'minValue' to 'responseOptions'->'minValue - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - column_to_terms[current_tuple]['responseOptions']['minValue'] = \ - json_map[json_key[0]]['minimumValue'] - print("minValue: %s" % column_to_terms[current_tuple]['responseOptions']['minValue']) - - if 'maxValue' in json_map[json_key[0]]: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple]["responseOptions"] = {} + column_to_terms[current_tuple]["responseOptions"][ + "minValue" + ] = json_map[json_key[0]]["minimumValue"] + print( + "minValue: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "minValue" + ] + ) + + if "maxValue" in json_map[json_key[0]]: # upgrade 'maxValue' to 'responseOptions'->'maxValue - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - column_to_terms[current_tuple]['responseOptions']['maxValue'] = \ - json_map[json_key[0]]['maxValue'] - print("maxValue: %s" % column_to_terms[current_tuple]['responseOptions']['maxValue']) - elif 'maximumValue' in json_map[json_key[0]]: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple]["responseOptions"] = {} + column_to_terms[current_tuple]["responseOptions"][ + "maxValue" + ] = json_map[json_key[0]]["maxValue"] + print( + "maxValue: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "maxValue" + ] + ) + elif "maximumValue" in json_map[json_key[0]]: # upgrade 'maxValue' to 'responseOptions'->'maxValue - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - column_to_terms[current_tuple]['responseOptions']['maxValue'] = \ - json_map[json_key[0]]['maximumValue'] - print("maxValue: %s" % column_to_terms[current_tuple]['responseOptions']['maxValue']) - if 'hasUnit' in json_map[json_key[0]]: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple]["responseOptions"] = {} + column_to_terms[current_tuple]["responseOptions"][ + "maxValue" + ] = json_map[json_key[0]]["maximumValue"] + print( + "maxValue: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "maxValue" + ] + ) + if "hasUnit" in json_map[json_key[0]]: # upgrade 'hasUnit' to 'responseOptions'->'unitCode - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - column_to_terms[current_tuple]['responseOptions']['unitCode'] = \ - json_map[json_key[0]]['hasUnit'] - print("unitCode: %s" % column_to_terms[current_tuple]['responseOptions']['unitCode']) - elif 'Units' in json_map[json_key[0]]: + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple]["responseOptions"] = {} + column_to_terms[current_tuple]["responseOptions"][ + "unitCode" + ] = json_map[json_key[0]]["hasUnit"] + print( + "unitCode: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "unitCode" + ] + ) + elif "Units" in json_map[json_key[0]]: # upgrade 'Units' to 'responseOptions'->'unitCode - if 'responseOptions' not in column_to_terms[current_tuple].keys(): - column_to_terms[current_tuple]['responseOptions'] = {} - column_to_terms[current_tuple]['responseOptions']['unitCode'] = \ - json_map[json_key[0]]['Units'] - print("unitCode: %s" % column_to_terms[current_tuple]['responseOptions']['unitCode']) + if ( + "responseOptions" + not in column_to_terms[current_tuple].keys() + ): + column_to_terms[current_tuple]["responseOptions"] = {} + column_to_terms[current_tuple]["responseOptions"][ + "unitCode" + ] = json_map[json_key[0]]["Units"] + print( + "unitCode: %s" + % column_to_terms[current_tuple]["responseOptions"][ + "unitCode" + ] + ) if "isAbout" in json_map[json_key[0]]: - #check if we have a single isAbout or multiple... - if isinstance(json_map[json_key[0]]['isAbout'],list): + # check if we have a single isAbout or multiple... + if isinstance(json_map[json_key[0]]["isAbout"], list): # isAbout is an empty list, do concept association if user asked for it else skip - if not json_map[json_key[0]]['isAbout']: + if not json_map[json_key[0]]["isAbout"]: if associate_concepts: # provide user with opportunity to associate a concept with this annotation - find_concept_interactive(column, current_tuple, column_to_terms, ilx_obj, - nidm_owl_graph=nidm_owl_graph) + find_concept_interactive( + column, + current_tuple, + column_to_terms, + ilx_obj, + nidm_owl_graph=nidm_owl_graph, + ) # write annotations to json file so user can start up again if not doing whole file - write_json_mapping_file(column_to_terms, output_file, bids) + write_json_mapping_file( + column_to_terms, output_file, bids + ) else: pass else: # else create a new list - column_to_terms[current_tuple]['isAbout'] = [] + column_to_terms[current_tuple]["isAbout"] = [] # for each isAbout entry - for subdict in json_map[json_key[0]]['isAbout']: + for subdict in json_map[json_key[0]]["isAbout"]: # some entries may not have 'label' so check - if 'label' in subdict.keys(): - column_to_terms[current_tuple]['isAbout'].append({'@id':subdict['@id'],'label':subdict['label']}) - print("isAbout: %s = %s, %s = %s" %('@id',subdict['@id'], - 'label',subdict['label'])) + if "label" in subdict.keys(): + column_to_terms[current_tuple][ + "isAbout" + ].append( + { + "@id": subdict["@id"], + "label": subdict["label"], + } + ) + print( + "isAbout: %s = %s, %s = %s" + % ( + "@id", + subdict["@id"], + "label", + subdict["label"], + ) + ) else: - column_to_terms[current_tuple]['isAbout'].append( - {'@id': subdict['@id']}) - print("isAbout: %s = %s" % ('@id', subdict['@id'])) - #for isabout_key,isabout_value in subdict.items(): + column_to_terms[current_tuple][ + "isAbout" + ].append({"@id": subdict["@id"]}) + print( + "isAbout: %s = %s" % ("@id", subdict["@id"]) + ) + # for isabout_key,isabout_value in subdict.items(): # column_to_terms[current_tuple]['isAbout'].append({isabout_key:isabout_value}) # print("isAbout: %s = %s" %(isabout_key, isabout_value)) # if isAbout is a dictionary then we only have 1 isAbout...we'll upgrade it to a list # to be consistent moving forward else: - column_to_terms[current_tuple]['isAbout'] = [] - if 'url' in json_map[json_key[0]]['isAbout'].keys(): - if 'label' in json_map[json_key[0]]['isAbout'].keys(): - column_to_terms[current_tuple]['isAbout'].append({'@id': - json_map[json_key[0]]['isAbout']['url'],'label': - json_map[json_key[0]]['isAbout']['label']}) + column_to_terms[current_tuple]["isAbout"] = [] + if "url" in json_map[json_key[0]]["isAbout"].keys(): + if "label" in json_map[json_key[0]]["isAbout"].keys(): + column_to_terms[current_tuple]["isAbout"].append( + { + "@id": json_map[json_key[0]]["isAbout"][ + "url" + ], + "label": json_map[json_key[0]]["isAbout"][ + "label" + ], + } + ) else: - column_to_terms[current_tuple]['isAbout'].append({'@id': - json_map[json_key[0]]['isAbout']['url']}) + column_to_terms[current_tuple]["isAbout"].append( + {"@id": json_map[json_key[0]]["isAbout"]["url"]} + ) else: - if 'label' in json_map[json_key[0]]['isAbout'].keys(): - column_to_terms[current_tuple]['isAbout'].append({'@id': - json_map[json_key[0]]['isAbout']['@id'],'label': - json_map[json_key[0]]['isAbout']['label']}) + if "label" in json_map[json_key[0]]["isAbout"].keys(): + column_to_terms[current_tuple]["isAbout"].append( + { + "@id": json_map[json_key[0]]["isAbout"][ + "@id" + ], + "label": json_map[json_key[0]]["isAbout"][ + "label" + ], + } + ) else: - column_to_terms[current_tuple]['isAbout'].append({'@id': - json_map[json_key[0]]['isAbout']['@id']}) - - - print("isAbout: %s = %s, %s = %s" %('@id',column_to_terms[current_tuple]['isAbout']['@id'], - 'label',column_to_terms[current_tuple]['isAbout']['label'])) + column_to_terms[current_tuple]["isAbout"].append( + {"@id": json_map[json_key[0]]["isAbout"]["@id"]} + ) + + print( + "isAbout: %s = %s, %s = %s" + % ( + "@id", + column_to_terms[current_tuple]["isAbout"]["@id"], + "label", + column_to_terms[current_tuple]["isAbout"]["label"], + ) + ) else: - # if user ran in mode where they want to associate concepts and this isn't the participant # id field then associate concepts. - if match_participant_id_field(json_map[json_key[0]]['sourceVariable']): - column_to_terms[current_tuple]['isAbout'] =[] - column_to_terms[current_tuple]['isAbout'].append({'@id':Constants.NIDM_SUBJECTID.uri, - 'label':Constants.NIDM_SUBJECTID.localpart}) + if match_participant_id_field( + json_map[json_key[0]]["sourceVariable"] + ): + column_to_terms[current_tuple]["isAbout"] = [] + column_to_terms[current_tuple]["isAbout"].append( + { + "@id": Constants.NIDM_SUBJECTID.uri, + "label": Constants.NIDM_SUBJECTID.localpart, + } + ) write_json_mapping_file(column_to_terms, output_file, bids) elif associate_concepts: # provide user with opportunity to associate a concept with this annotation - find_concept_interactive(column,current_tuple,column_to_terms,ilx_obj,nidm_owl_graph=nidm_owl_graph) + find_concept_interactive( + column, + current_tuple, + column_to_terms, + ilx_obj, + nidm_owl_graph=nidm_owl_graph, + ) # write annotations to json file so user can start up again if not doing whole file - write_json_mapping_file(column_to_terms,output_file,bids) + write_json_mapping_file(column_to_terms, output_file, bids) - print("***************************************************************************************") - print("---------------------------------------------------------------------------------------") + print( + "***************************************************************************************" + ) + print( + "---------------------------------------------------------------------------------------" + ) - if (json_map is not None) and (len(json_key)>0): + if (json_map is not None) and (len(json_key) > 0): continue except Exception as e: # so if this is an IndexError then it's likely our json mapping file keys are of the BIDS type @@ -1377,32 +1931,46 @@ def map_variables_to_terms(df,directory, assessment_name, output_file=None,json_ print("json annotation file not supplied") search_term = str(column) - #added for an automatic mapping of participant_id, subject_id, and variants + # added for an automatic mapping of participant_id, subject_id, and variants if match_participant_id_field(search_term.lower()): - # map this term to Constants.NIDM_SUBJECTID # since our subject ids are statically mapped to the Constants.NIDM_SUBJECTID we're creating a new # named tuple for this json map entry as it's not the same source as the rest of the data frame which # comes from the 'assessment_name' function parameter. subjid_tuple = str(DD(source=assessment_name, variable=search_term)) column_to_terms[subjid_tuple] = {} - column_to_terms[subjid_tuple]['label'] = search_term - column_to_terms[subjid_tuple]['description'] = "subject/participant identifier" - column_to_terms[subjid_tuple]['source_variable'] = str(search_term) + column_to_terms[subjid_tuple]["label"] = search_term + column_to_terms[subjid_tuple][ + "description" + ] = "subject/participant identifier" + column_to_terms[subjid_tuple]["source_variable"] = str(search_term) # added to support reproschema format - column_to_terms[subjid_tuple]['responseOptions'] = {} - column_to_terms[subjid_tuple]['responseOptions']['valueType'] = URIRef(Constants.XSD["string"]) - column_to_terms[subjid_tuple]['isAbout'] = [] - column_to_terms[subjid_tuple]['isAbout'].append({'@id':Constants.NIDM_SUBJECTID.uri, - 'label':Constants.NIDM_SUBJECTID.localpart}) + column_to_terms[subjid_tuple]["responseOptions"] = {} + column_to_terms[subjid_tuple]["responseOptions"]["valueType"] = URIRef( + Constants.XSD["string"] + ) + column_to_terms[subjid_tuple]["isAbout"] = [] + column_to_terms[subjid_tuple]["isAbout"].append( + { + "@id": Constants.NIDM_SUBJECTID.uri, + "label": Constants.NIDM_SUBJECTID.localpart, + } + ) # column_to_terms[subjid_tuple]['variable'] = str(column) - print("Variable %s automatically mapped to participant/subject identifier" %search_term) - print("Label: %s" %column_to_terms[subjid_tuple]['label']) - print("Description: %s" %column_to_terms[subjid_tuple]['description']) - #print("Url: %s" %column_to_terms[subjid_tuple]['url']) - print("Source Variable: %s" % column_to_terms[subjid_tuple]['source_variable']) - print("---------------------------------------------------------------------------------------") + print( + "Variable %s automatically mapped to participant/subject identifier" + % search_term + ) + print("Label: %s" % column_to_terms[subjid_tuple]["label"]) + print("Description: %s" % column_to_terms[subjid_tuple]["description"]) + # print("Url: %s" %column_to_terms[subjid_tuple]['url']) + print( + "Source Variable: %s" % column_to_terms[subjid_tuple]["source_variable"] + ) + print( + "---------------------------------------------------------------------------------------" + ) continue # if we haven't already found an annotation for this column then have user create one. if current_tuple not in column_to_terms.keys(): @@ -1413,62 +1981,84 @@ def map_variables_to_terms(df,directory, assessment_name, output_file=None,json_ # then ask user to find a concept if they selected to do so if associate_concepts: # provide user with opportunity to associate a concept with this annotation - find_concept_interactive(column, current_tuple, column_to_terms, ilx_obj, nidm_owl_graph=nidm_owl_graph) + find_concept_interactive( + column, + current_tuple, + column_to_terms, + ilx_obj, + nidm_owl_graph=nidm_owl_graph, + ) # write annotations to json file so user can start up again if not doing whole file write_json_mapping_file(column_to_terms, output_file, bids) - try: # now we should add the data element definition with concept annotation to InterLex # check if this is a categorical variable, if so it will have 'levels' key - if 'levels' in column_to_terms[current_tuple]: - if 'isAbout' in column_to_terms[current_tuple]: - ilx_output = AddPDEToInterlex(ilx_obj=ilx_obj, label=column_to_terms[current_tuple]['label'], - definition=column_to_terms[current_tuple]['description'], min = - column_to_terms[current_tuple]['minValue'], max = - column_to_terms[current_tuple]['maxValue'], units = - column_to_terms[current_tuple]['hasUnit'], datatype= - column_to_terms[current_tuple]['valueType'], isabout= - column_to_terms[current_tuple]['isAbout'], categorymappings= - json.dumps(column_to_terms[current_tuple]['levels'])) + if "levels" in column_to_terms[current_tuple]: + if "isAbout" in column_to_terms[current_tuple]: + ilx_output = AddPDEToInterlex( + ilx_obj=ilx_obj, + label=column_to_terms[current_tuple]["label"], + definition=column_to_terms[current_tuple]["description"], + min=column_to_terms[current_tuple]["minValue"], + max=column_to_terms[current_tuple]["maxValue"], + units=column_to_terms[current_tuple]["hasUnit"], + datatype=column_to_terms[current_tuple]["valueType"], + isabout=column_to_terms[current_tuple]["isAbout"], + categorymappings=json.dumps( + column_to_terms[current_tuple]["levels"] + ), + ) else: - ilx_output = AddPDEToInterlex(ilx_obj=ilx_obj, label=column_to_terms[current_tuple]['label'], - definition=column_to_terms[current_tuple]['description'], min = - column_to_terms[current_tuple]['minValue'], max = - column_to_terms[current_tuple]['maxValue'], units = - column_to_terms[current_tuple]['hasUnit'], datatype= - column_to_terms[current_tuple]['valueType'], categorymappings= - json.dumps(column_to_terms[current_tuple]['levels'])) + ilx_output = AddPDEToInterlex( + ilx_obj=ilx_obj, + label=column_to_terms[current_tuple]["label"], + definition=column_to_terms[current_tuple]["description"], + min=column_to_terms[current_tuple]["minValue"], + max=column_to_terms[current_tuple]["maxValue"], + units=column_to_terms[current_tuple]["hasUnit"], + datatype=column_to_terms[current_tuple]["valueType"], + categorymappings=json.dumps( + column_to_terms[current_tuple]["levels"] + ), + ) else: - if 'isAbout' in column_to_terms[current_tuple]: - ilx_output = AddPDEToInterlex(ilx_obj=ilx_obj, label=column_to_terms[current_tuple]['label'], - definition=column_to_terms[current_tuple]['description'], min = - column_to_terms[current_tuple]['minValue'], max = - column_to_terms[current_tuple]['maxValue'], units = - column_to_terms[current_tuple]['hasUnit'], datatype= - column_to_terms[current_tuple]['valueType'], isabout = - column_to_terms[current_tuple]['isAbout']) + if "isAbout" in column_to_terms[current_tuple]: + ilx_output = AddPDEToInterlex( + ilx_obj=ilx_obj, + label=column_to_terms[current_tuple]["label"], + definition=column_to_terms[current_tuple]["description"], + min=column_to_terms[current_tuple]["minValue"], + max=column_to_terms[current_tuple]["maxValue"], + units=column_to_terms[current_tuple]["hasUnit"], + datatype=column_to_terms[current_tuple]["valueType"], + isabout=column_to_terms[current_tuple]["isAbout"], + ) else: - ilx_output = AddPDEToInterlex(ilx_obj=ilx_obj, label=column_to_terms[current_tuple]['label'], - definition=column_to_terms[current_tuple]['description'], min = - column_to_terms[current_tuple]['minValue'], max = - column_to_terms[current_tuple]['maxValue'], units = - column_to_terms[current_tuple]['hasUnit'], datatype= - column_to_terms[current_tuple]['valueType']) + ilx_output = AddPDEToInterlex( + ilx_obj=ilx_obj, + label=column_to_terms[current_tuple]["label"], + definition=column_to_terms[current_tuple]["description"], + min=column_to_terms[current_tuple]["minValue"], + max=column_to_terms[current_tuple]["maxValue"], + units=column_to_terms[current_tuple]["hasUnit"], + datatype=column_to_terms[current_tuple]["valueType"], + ) # now store the url from Interlex for new personal data element in column_to_terms annotation - column_to_terms[current_tuple]['url'] = ilx_output.iri + column_to_terms[current_tuple]["url"] = ilx_output.iri except Exception as e: print("WARNING: WIP: Data element not submitted to InterLex. ") # write annotations to json file since data element annotations are complete write_json_mapping_file(column_to_terms, output_file, bids) # get CDEs for data dictionary and NIDM graph entity of data - cde = DD_to_nidm(column_to_terms,dataset_identifier=dataset_identifier) + cde = DD_to_nidm(column_to_terms, dataset_identifier=dataset_identifier) return [column_to_terms, cde] + def write_json_mapping_file(source_variable_annotations, output_file, bids=False): # if we want a bids-style json sidecar file if bids: @@ -1477,11 +2067,11 @@ def write_json_mapping_file(source_variable_annotations, output_file, bids=False new_dict = {} # remove 'responseOptions' and move 'choices' to 'levels' key - for key,value in temp_dict.items(): + for key, value in temp_dict.items(): new_dict[key] = {} - for subkey,subvalue in temp_dict[key].items(): - if subkey == 'responseOptions': - for subkey2,subvalue2 in temp_dict[key]['responseOptions'].items(): + for subkey, subvalue in temp_dict[key].items(): + if subkey == "responseOptions": + for subkey2, subvalue2 in temp_dict[key]["responseOptions"].items(): if subkey2 == "choices": new_dict[key]["levels"] = subvalue2 else: @@ -1489,35 +2079,52 @@ def write_json_mapping_file(source_variable_annotations, output_file, bids=False else: new_dict[key][subkey] = subvalue - # write - with open(os.path.join(os.path.dirname(output_file), os.path.splitext(output_file)[0] + ".json"), 'w+') \ - as fp: - json.dump(new_dict, fp,indent=4) + with open( + os.path.join( + os.path.dirname(output_file), os.path.splitext(output_file)[0] + ".json" + ), + "w+", + ) as fp: + json.dump(new_dict, fp, indent=4) else: - # logging.info("saving json mapping file: %s" %os.path.join(os.path.basename(output_file), \ # os.path.splitext(output_file)[0]+".json")) - with open(os.path.join(os.path.dirname(output_file), os.path.splitext(output_file)[0] + "_annotations.json"), 'w+') \ - as fp: - json.dump(source_variable_annotations, fp,indent=4) - -def find_concept_interactive(source_variable, current_tuple, source_variable_annotations, ilx_obj,ancestor=True,nidm_owl_graph=None): - ''' + with open( + os.path.join( + os.path.dirname(output_file), + os.path.splitext(output_file)[0] + "_annotations.json", + ), + "w+", + ) as fp: + json.dump(source_variable_annotations, fp, indent=4) + + +def find_concept_interactive( + source_variable, + current_tuple, + source_variable_annotations, + ilx_obj, + ancestor=True, + nidm_owl_graph=None, +): + """ This function will allow user to interactively find a concept in the InterLex, CogAtlas, and NIDM to associate with the source variable from the assessment encoded in the current_tuple Starts by using NIDM-Terms concepts which are ones that have previously been used to annotate datasets. By starting with these we maximize chances of being able to query across datasets using concept-drivin queries. - ''' + """ # Before we run anything here if both InterLex and NIDM OWL file access is down we should just alert # the user and return cause we're not going to be able to do really anything if (nidm_owl_graph is None) and (ilx_obj is None): print("Both InterLex and NIDM OWL file access is not possible") - print("Check your internet connection and try again or supply a JSON annotation file with all the variables " - "mapped to terms") + print( + "Check your internet connection and try again or supply a JSON annotation file with all the variables " + "mapped to terms" + ) return source_variable_annotations # added by DBK 5/14/21 to support pulling concepts used in previous dataset annotations in from NIDM-Terms @@ -1535,7 +2142,7 @@ def find_concept_interactive(source_variable, current_tuple, source_variable_ann min_match_score = 50 search_term = str(source_variable) # loop to find a concept by iteratively searching InterLex...or defining your own - go_loop=True + go_loop = True while go_loop: # variable for numbering options returned from elastic search option = 1 @@ -1545,69 +2152,105 @@ def find_concept_interactive(source_variable, current_tuple, source_variable_ann # modified by DBK 5/14/21 to start with nidm-terms used concepts if nidmterms_concepts is not None: - nidmterms_concepts_query = fuzzy_match_concepts_from_nidmterms_jsonld(nidmterms_concepts, search_term) + nidmterms_concepts_query = fuzzy_match_concepts_from_nidmterms_jsonld( + nidmterms_concepts, search_term + ) search_result = {} first_nidm_term = True for key, subdict in nidmterms_concepts_query.items(): - if nidmterms_concepts_query[key]['score'] > min_match_score: + if nidmterms_concepts_query[key]["score"] > min_match_score: if first_nidm_term: print() print("NIDM-Terms Concepts:") first_nidm_term = False - print("%d: Label: %s \t Definition: %s \t URL: %s" % ( - option, nidmterms_concepts_query[key]['label'], nidmterms_concepts_query[key]['definition'], - nidmterms_concepts_query[key]['url'])) + print( + "%d: Label: %s \t Definition: %s \t URL: %s" + % ( + option, + nidmterms_concepts_query[key]["label"], + nidmterms_concepts_query[key]["definition"], + nidmterms_concepts_query[key]["url"], + ) + ) search_result[key] = {} - search_result[key]['label'] = nidmterms_concepts_query[key]['label'] - search_result[key]['definition'] = nidmterms_concepts_query[key]['definition'] - search_result[key]['preferred_url'] = nidmterms_concepts_query[key]['url'] + search_result[key]["label"] = nidmterms_concepts_query[key]["label"] + search_result[key]["definition"] = nidmterms_concepts_query[key][ + "definition" + ] + search_result[key]["preferred_url"] = nidmterms_concepts_query[key][ + "url" + ] search_result[str(option)] = key option = option + 1 - if not ancestor: if ilx_obj is not None: # for each column name, query Interlex for possible matches - ilx_result = GetNIDMTermsFromSciCrunch(search_term, type='term', ancestor=False) + ilx_result = GetNIDMTermsFromSciCrunch( + search_term, type="term", ancestor=False + ) - #temp = ilx_result.copy() + # temp = ilx_result.copy() # print("Search Term: %s" %search_term) if len(ilx_result) != 0: print("InterLex:") print() # print("Search Results: ") for key, value in ilx_result.items(): - print("%d: Label: %s \t Definition: %s \t Preferred URL: %s " % ( - option, ilx_result[key]['label'], ilx_result[key]['definition'], - ilx_result[key]['preferred_url'])) - - search_result[key]={} - search_result[key]['label'] = ilx_result[key]['label'] - search_result[key]['definition'] = ilx_result[key]['definition'] - search_result[key]['preferred_url'] = ilx_result[key]['preferred_url'] + print( + "%d: Label: %s \t Definition: %s \t Preferred URL: %s " + % ( + option, + ilx_result[key]["label"], + ilx_result[key]["definition"], + ilx_result[key]["preferred_url"], + ) + ) + + search_result[key] = {} + search_result[key]["label"] = ilx_result[key]["label"] + search_result[key]["definition"] = ilx_result[key]["definition"] + search_result[key]["preferred_url"] = ilx_result[key][ + "preferred_url" + ] search_result[str(option)] = key option = option + 1 - # Cognitive Atlas Concepts Search try: - cogatlas_concepts_query = fuzzy_match_terms_from_cogatlas_json(cogatlas_concepts.json,search_term) + cogatlas_concepts_query = fuzzy_match_terms_from_cogatlas_json( + cogatlas_concepts.json, search_term + ) first_cogatlas_concept = True for key, subdict in cogatlas_concepts_query.items(): - if cogatlas_concepts_query[key]['score'] > min_match_score+20: + if cogatlas_concepts_query[key]["score"] > min_match_score + 20: if first_cogatlas_concept: print() print("Cognitive Atlas:") print() first_cogatlas_concept = False - print("%d: Label: %s \t Definition: %s " % ( - option, cogatlas_concepts_query[key]['label'], cogatlas_concepts_query[key]['definition'].rstrip('\r\n'))) + print( + "%d: Label: %s \t Definition: %s " + % ( + option, + cogatlas_concepts_query[key]["label"], + cogatlas_concepts_query[key]["definition"].rstrip( + "\r\n" + ), + ) + ) search_result[key] = {} - search_result[key]['label'] = cogatlas_concepts_query[key]['label'] - search_result[key]['definition'] = cogatlas_concepts_query[key]['definition'].rstrip('\r\n') - search_result[key]['preferred_url'] = cogatlas_concepts_query[key]['url'] + search_result[key]["label"] = cogatlas_concepts_query[key][ + "label" + ] + search_result[key]["definition"] = cogatlas_concepts_query[key][ + "definition" + ].rstrip("\r\n") + search_result[key]["preferred_url"] = cogatlas_concepts_query[ + key + ]["url"] search_result[str(option)] = key option = option + 1 except: @@ -1615,16 +2258,31 @@ def find_concept_interactive(source_variable, current_tuple, source_variable_ann # Cognitive Atlas Disorders Search try: - cogatlas_disorders_query = fuzzy_match_terms_from_cogatlas_json(cogatlas_disorders.json, search_term) + cogatlas_disorders_query = fuzzy_match_terms_from_cogatlas_json( + cogatlas_disorders.json, search_term + ) for key, subdict in cogatlas_disorders_query.items(): - if cogatlas_disorders_query[key]['score'] > min_match_score+20: - print("%d: Label: %s \t Definition: %s " % ( - option, cogatlas_disorders_query[key]['label'], cogatlas_disorders_query[key]['definition'].rstrip('\r\n'), - )) + if cogatlas_disorders_query[key]["score"] > min_match_score + 20: + print( + "%d: Label: %s \t Definition: %s " + % ( + option, + cogatlas_disorders_query[key]["label"], + cogatlas_disorders_query[key]["definition"].rstrip( + "\r\n" + ), + ) + ) search_result[key] = {} - search_result[key]['label'] = cogatlas_disorders_query[key]['label'] - search_result[key]['definition'] = cogatlas_disorders_query[key]['definition'].rstrip('\r\n') - search_result[key]['preferred_url'] = cogatlas_disorders_query[key]['url'] + search_result[key]["label"] = cogatlas_disorders_query[key][ + "label" + ] + search_result[key]["definition"] = cogatlas_disorders_query[ + key + ]["definition"].rstrip("\r\n") + search_result[key]["preferred_url"] = cogatlas_disorders_query[ + key + ]["url"] search_result[str(option)] = key option = option + 1 except: @@ -1635,48 +2293,68 @@ def find_concept_interactive(source_variable, current_tuple, source_variable_ann if nidm_owl_graph is not None: # Add existing NIDM Terms as possible selections which fuzzy match the search_term - nidm_constants_query = fuzzy_match_terms_from_graph(nidm_owl_graph, search_term) + nidm_constants_query = fuzzy_match_terms_from_graph( + nidm_owl_graph, search_term + ) first_nidm_term = True for key, subdict in nidm_constants_query.items(): - if nidm_constants_query[key]['score'] > min_match_score: + if nidm_constants_query[key]["score"] > min_match_score: if first_nidm_term: print() print("NIDM Ontology Terms:") first_nidm_term = False - print("%d: Label: %s \t Definition: %s \t URL: %s" % ( - option, nidm_constants_query[key]['label'], nidm_constants_query[key]['definition'], - nidm_constants_query[key]['url'])) + print( + "%d: Label: %s \t Definition: %s \t URL: %s" + % ( + option, + nidm_constants_query[key]["label"], + nidm_constants_query[key]["definition"], + nidm_constants_query[key]["url"], + ) + ) search_result[key] = {} - search_result[key]['label'] = nidm_constants_query[key]['label'] - search_result[key]['definition'] = nidm_constants_query[key]['definition'] - search_result[key]['preferred_url'] = nidm_constants_query[key]['url'] + search_result[key]["label"] = nidm_constants_query[key]["label"] + search_result[key]["definition"] = nidm_constants_query[key][ + "definition" + ] + search_result[key]["preferred_url"] = nidm_constants_query[key][ + "url" + ] search_result[str(option)] = key option = option + 1 print() if ancestor: # Broaden Interlex search - print("%d: Broaden Search (includes interlex, cogatlas, and nidm ontology) " % option) + print( + "%d: Broaden Search (includes interlex, cogatlas, and nidm ontology) " + % option + ) else: # Narrow Interlex search - print("%d: Narrow Search (includes nidm-terms previously used concepts) " % option) + print( + "%d: Narrow Search (includes nidm-terms previously used concepts) " + % option + ) option = option + 1 # Add option to change query string - print("%d: Change query string from: \"%s\"" % (option, search_term)) + print('%d: Change query string from: "%s"' % (option, search_term)) ########DEFINE NEW CONCEPT COMMENTED OUT RIGHT NOW#################################### ## Add option to define your own term - #option = option + 1 - #print("%d: Define my own concept for this variable" % option) + # option = option + 1 + # print("%d: Define my own concept for this variable" % option) ########DEFINE NEW CONCEPT COMMENTED OUT RIGHT NOW#################################### # Add option to define your own term option = option + 1 print("%d: No concept needed for this variable" % option) - print("---------------------------------------------------------------------------------------") + print( + "---------------------------------------------------------------------------------------" + ) # Wait for user input selection = input("Please select an option (1:%d) from above: \t" % option) @@ -1691,19 +2369,24 @@ def find_concept_interactive(source_variable, current_tuple, source_variable_ann # check if selection is to re-run query with new search term elif int(selection) == (option - 1): # ask user for new search string - search_term = input("Please input new search string for CSV column: %s \t:" % source_variable) - print("---------------------------------------------------------------------------------------") + search_term = input( + "Please input new search string for CSV column: %s \t:" + % source_variable + ) + print( + "---------------------------------------------------------------------------------------" + ) ########DEFINE NEW CONCEPT COMMENTED OUT RIGHT NOW#################################### - #elif int(selection) == (option - 1): + # elif int(selection) == (option - 1): # new_concept = define_new_concept(source_variable,ilx_obj) - # add new concept to InterLex and retrieve URL for isAbout - # - # - # + # add new concept to InterLex and retrieve URL for isAbout + # + # + # # source_variable_annotations[current_tuple]['isAbout'] = new_concept.iri + '#' # go_loop = False - # if user says no concept mapping needed then just exit this loop + # if user says no concept mapping needed then just exit this loop ########DEFINE NEW CONCEPT COMMENTED OUT RIGHT NOW#################################### elif int(selection) == (option): # don't need to continue while loop because we've decided not to associate a concept with this variable. @@ -1711,45 +2394,58 @@ def find_concept_interactive(source_variable, current_tuple, source_variable_ann else: # user selected one of the existing concepts to add its URL to the isAbout property # added labels to these isAbout urls for easy querying later - source_variable_annotations[current_tuple]['isAbout'] = [] - source_variable_annotations[current_tuple]['isAbout'].append({'@id': - search_result[search_result[selection]]['preferred_url'],'label': - search_result[search_result[selection]]['label']}) - print("\nConcept annotation added for source variable: %s" %source_variable) + source_variable_annotations[current_tuple]["isAbout"] = [] + source_variable_annotations[current_tuple]["isAbout"].append( + { + "@id": search_result[search_result[selection]]["preferred_url"], + "label": search_result[search_result[selection]]["label"], + } + ) + print( + "\nConcept annotation added for source variable: %s" % source_variable + ) go_loop = False - - def define_new_concept(source_variable, ilx_obj): # user wants to define their own term. Ask for term label and definition print("\nYou selected to enter a new concept for CSV column: %s" % source_variable) # collect term information from user - concept_label = input("Please enter a label for the new concept [%s]:\t" % source_variable) + concept_label = input( + "Please enter a label for the new concept [%s]:\t" % source_variable + ) concept_definition = input("Please enter a definition for this concept:\t") # add concept to InterLex and get URL # Add personal data element to InterLex - ilx_output = AddConceptToInterlex(ilx_obj=ilx_obj, label=concept_label, definition=concept_definition) + ilx_output = AddConceptToInterlex( + ilx_obj=ilx_obj, label=concept_label, definition=concept_definition + ) return ilx_output + def annotate_data_element(source_variable, current_tuple, source_variable_annotations): - ''' + """ :source_variable: variable name for which we're annotating :current_tuple: this is the tuple key of the :source_variable: in the dictionary :source_variable_annotations:. These are compound keys :source_variable_annotations: dictionary of variable annotations. - ''' + """ # user instructions - print("\nYou will now be asked a series of questions to annotate your term: %s" % source_variable) + print( + "\nYou will now be asked a series of questions to annotate your term: %s" + % source_variable + ) # collect term information from user - term_label = input("Please enter a full name to associate with the term [%s]:\t" % source_variable) - if term_label == '': + term_label = input( + "Please enter a full name to associate with the term [%s]:\t" % source_variable + ) + if term_label == "": term_label = source_variable term_definition = input("Please enter a definition for this term:\t") @@ -1758,49 +2454,64 @@ def annotate_data_element(source_variable, current_tuple, source_variable_annota while True: print("Please enter the value type for this term from the following list:") print("\t 1: string - The string datatype represents character strings") - print("\t 2: categorical - A variable that can take on one of a limited number of possible values, assigning each to a nominal category on the basis of some qualitative property.") + print( + "\t 2: categorical - A variable that can take on one of a limited number of possible values, assigning each to a nominal category on the basis of some qualitative property." + ) print("\t 3: boolean - Binary-valued logic:{true,false}") - print("\t 4: integer - Integer is a number that can be written without a fractional component") - print("\t 5: float - Float consists of the values m × 2^e, where m is an integer whose absolute value is less than 2^24, and e is an integer between -149 and 104, inclusive") - print("\t 6: double - Double consists of the values m × 2^e, where m is an integer whose absolute value is less than 2^53, and e is an integer between -1075 and 970, inclusive") + print( + "\t 4: integer - Integer is a number that can be written without a fractional component" + ) + print( + "\t 5: float - Float consists of the values m × 2^e, where m is an integer whose absolute value is less than 2^24, and e is an integer between -149 and 104, inclusive" + ) + print( + "\t 6: double - Double consists of the values m × 2^e, where m is an integer whose absolute value is less than 2^53, and e is an integer between -1075 and 970, inclusive" + ) print("\t 7: duration - Duration represents a duration of time") - print("\t 8: dateTime - Values with integer-valued year, month, day, hour and minute properties, a decimal-valued second property, and a boolean timezoned property.") + print( + "\t 8: dateTime - Values with integer-valued year, month, day, hour and minute properties, a decimal-valued second property, and a boolean timezoned property." + ) print("\t 9: time - Time represents an instant of time that recurs every day") - print("\t 10: date - Date consists of top-open intervals of exactly one day in length on the timelines of dateTime, beginning on the beginning moment of each day (in each timezone)") - print("\t 11: anyURI - anyURI represents a Uniform Resource Identifier Reference (URI). An anyURI value can be absolute or relative, and may have an optional fragment identifier") + print( + "\t 10: date - Date consists of top-open intervals of exactly one day in length on the timelines of dateTime, beginning on the beginning moment of each day (in each timezone)" + ) + print( + "\t 11: anyURI - anyURI represents a Uniform Resource Identifier Reference (URI). An anyURI value can be absolute or relative, and may have an optional fragment identifier" + ) term_datatype = input("Please enter the datatype [1:11]:\t") # check datatypes if not in [integer,real,categorical] repeat until it is if int(term_datatype) >= 1 and int(term_datatype) <= 11: - if(int(term_datatype) == 1): + if int(term_datatype) == 1: term_datatype = URIRef(Constants.XSD["string"]) - elif (int(term_datatype) == 3): + elif int(term_datatype) == 3: term_datatype = URIRef(Constants.XSD["boolean"]) - elif (int(term_datatype) == 4): + elif int(term_datatype) == 4: term_datatype = URIRef(Constants.XSD["integer"]) - elif (int(term_datatype) == 5): + elif int(term_datatype) == 5: term_datatype = URIRef(Constants.XSD["float"]) - elif (int(term_datatype) == 6): + elif int(term_datatype) == 6: term_datatype = URIRef(Constants.XSD["double"]) - elif (int(term_datatype) == 7): + elif int(term_datatype) == 7: term_datatype = URIRef(Constants.XSD["duration"]) - elif (int(term_datatype) == 8): + elif int(term_datatype) == 8: term_datatype = URIRef(Constants.XSD["dateTime"]) - elif (int(term_datatype) == 9): + elif int(term_datatype) == 9: term_datatype = URIRef(Constants.XSD["time"]) - elif (int(term_datatype) == 10): + elif int(term_datatype) == 10: term_datatype = URIRef(Constants.XSD["date"]) - elif (int(term_datatype) == 11): + elif int(term_datatype) == 11: term_datatype = URIRef(Constants.XSD["anyURI"]) - elif (int(term_datatype) == 2): + elif int(term_datatype) == 2: term_datatype = URIRef(Constants.XSD["complexType"]) break # now check if term_datatype is categorical and if so let's get the label <-> value mappings if term_datatype == URIRef(Constants.XSD["complexType"]): - # ask user for the number of categories while True: - num_categories = input("Please enter the number of categories/labels for this term:\t") + num_categories = input( + "Please enter the number of categories/labels for this term:\t" + ) # check if user supplied a number else repeat question try: val = int(num_categories) @@ -1809,15 +2520,22 @@ def annotate_data_element(source_variable, current_tuple, source_variable_annota print("That's not an integer, please try again!") # loop over number of categories and collect information - cat_value = input("Are there numerical values associated with your text-based categories [yes]?\t") - if (cat_value in ['Y', 'y', 'YES', 'yes', 'Yes']) or (cat_value == ""): + cat_value = input( + "Are there numerical values associated with your text-based categories [yes]?\t" + ) + if (cat_value in ["Y", "y", "YES", "yes", "Yes"]) or (cat_value == ""): # if yes then store this as a dictionary cat_label: cat_value term_category = {} for category in range(1, int(num_categories) + 1): # term category dictionary has labels as keys and value associated with label as value - cat_label = input("Please enter the text string label for the category %d:\t" % category) - cat_value = input("Please enter the value associated with label \"%s\":\t" % cat_label) + cat_label = input( + "Please enter the text string label for the category %d:\t" + % category + ) + cat_value = input( + 'Please enter the value associated with label "%s":\t' % cat_label + ) term_category[cat_label] = cat_value else: @@ -1825,7 +2543,10 @@ def annotate_data_element(source_variable, current_tuple, source_variable_annota term_category = [] for category in range(1, int(num_categories) + 1): # term category dictionary has labels as keys and value associated with label as value - cat_label = input("Please enter the text string label for the category %d:\t" % category) + cat_label = input( + "Please enter the text string label for the category %d:\t" + % category + ) term_category.append(cat_label) # if term is not categorical then ask for min/max values. If it is categorical then simply extract @@ -1835,68 +2556,105 @@ def annotate_data_element(source_variable, current_tuple, source_variable_annota term_max = input("Please enter the maximum value [NA]:\t") term_units = input("Please enter the units [NA]:\t") # check if responseOptions is a key, if not create it - if 'responseOptions' not in source_variable_annotations[current_tuple].keys(): - source_variable_annotations[current_tuple]['responseOptions'] = {} + if "responseOptions" not in source_variable_annotations[current_tuple].keys(): + source_variable_annotations[current_tuple]["responseOptions"] = {} # if user set any of these then store else ignore - source_variable_annotations[current_tuple]['responseOptions']['unitCode'] = term_units - source_variable_annotations[current_tuple]['responseOptions']['minValue'] = term_min - source_variable_annotations[current_tuple]['responseOptions']['maxValue'] = term_max + source_variable_annotations[current_tuple]["responseOptions"][ + "unitCode" + ] = term_units + source_variable_annotations[current_tuple]["responseOptions"][ + "minValue" + ] = term_min + source_variable_annotations[current_tuple]["responseOptions"][ + "maxValue" + ] = term_max # if the categorical data has numeric values then we can infer a min/max - elif cat_value in ['Y', 'y', 'YES', 'yes', 'Yes']: + elif cat_value in ["Y", "y", "YES", "yes", "Yes"]: # check if responseOptions is a key, if not create it - if 'responseOptions' not in source_variable_annotations[current_tuple].keys(): - source_variable_annotations[current_tuple]['responseOptions'] = {} - source_variable_annotations[current_tuple]['responseOptions']['minValue'] = min(term_category.values()) - source_variable_annotations[current_tuple]['responseOptions']['maxValue'] = max(term_category.values()) - source_variable_annotations[current_tuple]['responseOptions']['unitCode'] = 'NA' + if "responseOptions" not in source_variable_annotations[current_tuple].keys(): + source_variable_annotations[current_tuple]["responseOptions"] = {} + source_variable_annotations[current_tuple]["responseOptions"]["minValue"] = min( + term_category.values() + ) + source_variable_annotations[current_tuple]["responseOptions"]["maxValue"] = max( + term_category.values() + ) + source_variable_annotations[current_tuple]["responseOptions"]["unitCode"] = "NA" # categorical with no min/max values else: # check if responseOptions is a key, if not create it - if 'responseOptions' not in source_variable_annotations[current_tuple].keys(): - source_variable_annotations[current_tuple]['responseOptions'] = {} - source_variable_annotations[current_tuple]['responseOptions']['minValue'] = 'NA' - source_variable_annotations[current_tuple]['responseOptions']['maxValue'] = 'NA' - source_variable_annotations[current_tuple]['responseOptions']['unitCode'] = 'NA' + if "responseOptions" not in source_variable_annotations[current_tuple].keys(): + source_variable_annotations[current_tuple]["responseOptions"] = {} + source_variable_annotations[current_tuple]["responseOptions"]["minValue"] = "NA" + source_variable_annotations[current_tuple]["responseOptions"]["maxValue"] = "NA" + source_variable_annotations[current_tuple]["responseOptions"]["unitCode"] = "NA" # set term variable name as column from CSV file we're currently interrogating term_variable_name = source_variable # store term info in dictionary # check if responseOptions is a key, if not create it - if 'responseOptions' not in source_variable_annotations[current_tuple].keys(): - source_variable_annotations[current_tuple]['responseOptions'] = {} - source_variable_annotations[current_tuple]['label'] = term_label - source_variable_annotations[current_tuple]['description'] = term_definition - source_variable_annotations[current_tuple]['source_variable'] = str(source_variable) - source_variable_annotations[current_tuple]['responseOptions']['valueType'] = term_datatype - source_variable_annotations[current_tuple]['associatedWith'] = "NIDM" + if "responseOptions" not in source_variable_annotations[current_tuple].keys(): + source_variable_annotations[current_tuple]["responseOptions"] = {} + source_variable_annotations[current_tuple]["label"] = term_label + source_variable_annotations[current_tuple]["description"] = term_definition + source_variable_annotations[current_tuple]["source_variable"] = str(source_variable) + source_variable_annotations[current_tuple]["responseOptions"][ + "valueType" + ] = term_datatype + source_variable_annotations[current_tuple]["associatedWith"] = "NIDM" if term_datatype == URIRef(Constants.XSD["complexType"]): - source_variable_annotations[current_tuple]['responseOptions']['choices'] = term_category + source_variable_annotations[current_tuple]["responseOptions"][ + "choices" + ] = term_category # print mappings - print("\n*************************************************************************************") + print( + "\n*************************************************************************************" + ) print("Stored mapping: %s -> " % source_variable) - print("label: %s" % source_variable_annotations[current_tuple]['label']) - print("source variable: %s" % source_variable_annotations[current_tuple]['source_variable']) - print("description: %s" % source_variable_annotations[current_tuple]['description']) - print("valueType: %s" % source_variable_annotations[current_tuple]['responseOptions']['valueType']) + print("label: %s" % source_variable_annotations[current_tuple]["label"]) + print( + "source variable: %s" + % source_variable_annotations[current_tuple]["source_variable"] + ) + print("description: %s" % source_variable_annotations[current_tuple]["description"]) + print( + "valueType: %s" + % source_variable_annotations[current_tuple]["responseOptions"]["valueType"] + ) # left for legacy purposes - if 'hasUnit' in source_variable_annotations[current_tuple]: - print("hasUnit: %s" % source_variable_annotations[current_tuple]['hasUnit']) - elif 'unitCode' in source_variable_annotations[current_tuple]['responseOptions']: - print("hasUnit: %s" % source_variable_annotations[current_tuple]['responseOptions']['unitCode']) - if 'minValue' in source_variable_annotations[current_tuple]['responseOptions']: - print("minimumValue: %s" % source_variable_annotations[current_tuple]['responseOptions']['minValue']) - if 'maxValue' in source_variable_annotations[current_tuple]['responseOptions']: - print("maximumValue: %s" % source_variable_annotations[current_tuple]['responseOptions']['maxValue']) + if "hasUnit" in source_variable_annotations[current_tuple]: + print("hasUnit: %s" % source_variable_annotations[current_tuple]["hasUnit"]) + elif "unitCode" in source_variable_annotations[current_tuple]["responseOptions"]: + print( + "hasUnit: %s" + % source_variable_annotations[current_tuple]["responseOptions"]["unitCode"] + ) + if "minValue" in source_variable_annotations[current_tuple]["responseOptions"]: + print( + "minimumValue: %s" + % source_variable_annotations[current_tuple]["responseOptions"]["minValue"] + ) + if "maxValue" in source_variable_annotations[current_tuple]["responseOptions"]: + print( + "maximumValue: %s" + % source_variable_annotations[current_tuple]["responseOptions"]["maxValue"] + ) if term_datatype == URIRef(Constants.XSD["complexType"]): - print("choices: %s" % source_variable_annotations[current_tuple]['responseOptions']['choices']) - print("---------------------------------------------------------------------------------------") + print( + "choices: %s" + % source_variable_annotations[current_tuple]["responseOptions"]["choices"] + ) + print( + "---------------------------------------------------------------------------------------" + ) -def DD_UUID (element,dd_struct,dataset_identifier=None): - ''' + +def DD_UUID(element, dd_struct, dataset_identifier=None): + """ This function will produce a hash of the data dictionary (personal data element) properties defined by the user for use as a UUID. The data dictionary key is a tuple identifying the file and variable name within that file to be encoded with a UUID. The idea is that if the data dictionaries for a @@ -1904,7 +2662,7 @@ def DD_UUID (element,dd_struct,dataset_identifier=None): :param element: element in dd_struct to create UUID for within the dd_struct :param dd_struct: data dictionary json structure :return: hash of - ''' + """ # evaluate the compound data dictionary key and loop over the properties key_tuple = eval(element) @@ -1912,45 +2670,51 @@ def DD_UUID (element,dd_struct,dataset_identifier=None): # added getUUID to property string to solve problem where all openneuro datasets that have the same # source variable name and properties don't end up having the same UUID as they are sometimes not # the same and end up being added to the same entity when merging graphs across all openneuro projects - # if a dataset identifier is not provided then we use a random UUID + # if a dataset identifier is not provided then we use a random UUID if dataset_identifier is not None: property_string = dataset_identifier else: property_string = getUUID() for key, value in dd_struct[str(key_tuple)].items(): - if key == 'label': + if key == "label": property_string = property_string + str(value) # added to support 'reponseOptions' reproschema format - if (key == 'responseOptions'): - for subkey,subvalue in dd_struct[str(key_tuple)]['responseOptions'].items(): - if (subkey == 'levels') or (subkey == 'Levels') or (subkey == 'choices'): + if key == "responseOptions": + for subkey, subvalue in dd_struct[str(key_tuple)][ + "responseOptions" + ].items(): + if ( + (subkey == "levels") + or (subkey == "Levels") + or (subkey == "choices") + ): property_string = property_string + str(subvalue) - if subkey == 'valueType': + if subkey == "valueType": property_string = property_string + str(subvalue) - if (subkey == 'hasUnit') or (subkey == 'unitCode'): + if (subkey == "hasUnit") or (subkey == "unitCode"): property_string = property_string + str(subvalue) - if key == 'source_variable': + if key == "source_variable": variable_name = value - crc32hash = base_repr(crc32(str(property_string).encode()), 32).lower() niiri_ns = Namespace(Constants.NIIRI) cde_id = URIRef(niiri_ns + safe_string(variable_name) + "_" + str(crc32hash)) return cde_id -def DD_to_nidm(dd_struct,dataset_identifier=None): - ''' + +def DD_to_nidm(dd_struct, dataset_identifier=None): + """ Takes a DD json structure and returns nidm CDE-style graph to be added to NIDM documents :param DD: :return: NIDM graph - ''' + """ # create empty graph for CDEs - g=Graph() - g.bind(prefix='prov',namespace=Constants.PROV) - g.bind(prefix='dct',namespace=Constants.DCT) - g.bind(prefix='bids',namespace=Constants.BIDS) + g = Graph() + g.bind(prefix="prov", namespace=Constants.PROV) + g.bind(prefix="dct", namespace=Constants.DCT) + g.bind(prefix="bids", namespace=Constants.BIDS) # key_num = 0 # for each named tuple key in data dictionary @@ -1965,138 +2729,177 @@ def DD_to_nidm(dd_struct,dataset_identifier=None): # add the DataElement RDF type in the source namespace key_tuple = eval(key) for subkey, item in key_tuple._asdict().items(): - - if subkey == 'variable': - - #item_ns = Namespace(dd_struct[str(key_tuple)]["url"]+"/") - #g.bind(prefix=safe_string(item), namespace=item_ns) + if subkey == "variable": + # item_ns = Namespace(dd_struct[str(key_tuple)]["url"]+"/") + # g.bind(prefix=safe_string(item), namespace=item_ns) nidm_ns = Namespace(Constants.NIDM) - g.bind(prefix='nidm', namespace=nidm_ns) + g.bind(prefix="nidm", namespace=nidm_ns) niiri_ns = Namespace(Constants.NIIRI) - g.bind(prefix='niiri', namespace=niiri_ns) + g.bind(prefix="niiri", namespace=niiri_ns) ilx_ns = Namespace(Constants.INTERLEX) - g.bind(prefix='ilx', namespace=ilx_ns) + g.bind(prefix="ilx", namespace=ilx_ns) # cde_id = item_ns[str(key_num).zfill(4)] # hash the key_tuple (e.g. DD(source=[FILENAME],variable=[VARNAME])) - #crc32hash = base_repr(crc32(str(key).encode()),32).lower() + # crc32hash = base_repr(crc32(str(key).encode()),32).lower() # md5hash = hashlib.md5(str(key).encode()).hexdigest() - - cde_id = DD_UUID(key,dd_struct,dataset_identifier) - #cde_id = URIRef(niiri_ns + safe_string(item) + "_" + str(crc32hash)) - g.add((cde_id,RDF.type, Constants.NIDM['PersonalDataElement'])) - g.add((cde_id,RDF.type, Constants.PROV['Entity'])) + cde_id = DD_UUID(key, dd_struct, dataset_identifier) + # cde_id = URIRef(niiri_ns + safe_string(item) + "_" + str(crc32hash)) + g.add((cde_id, RDF.type, Constants.NIDM["PersonalDataElement"])) + g.add((cde_id, RDF.type, Constants.PROV["Entity"])) # DBK: 3/25/21 - added to connect nidm:PersonalDataElement to the more general nidm:DataElement as # subclass to aid in queries - g.add((Constants.NIDM['PersonalDataElement'], Constants.RDFS['subClassOf'], - Constants.NIDM['DataElement'])) + g.add( + ( + Constants.NIDM["PersonalDataElement"], + Constants.RDFS["subClassOf"], + Constants.NIDM["DataElement"], + ) + ) # this code adds the properties about the particular CDE into NIDM document for key, value in dd_struct[str(key_tuple)].items(): - if key == 'definition': - g.add((cde_id,RDFS['comment'],Literal(value))) - elif key == 'description': - g.add((cde_id,Constants.DCT['description'],Literal(value))) - elif key == 'url': - g.add((cde_id,Constants.NIDM['url'],URIRef(value))) - elif key == 'label': - g.add((cde_id,Constants.RDFS['label'],Literal(value))) - elif (key == 'levels') or (key == 'Levels'): - g.add((cde_id,Constants.NIDM['levels'],Literal(value))) - elif key == 'source_variable': - g.add((cde_id, Constants.NIDM['sourceVariable'], Literal(value))) - elif key == 'isAbout': - #dct_ns = Namespace(Constants.DCT) - #g.bind(prefix='dct', namespace=dct_ns) + if key == "definition": + g.add((cde_id, RDFS["comment"], Literal(value))) + elif key == "description": + g.add((cde_id, Constants.DCT["description"], Literal(value))) + elif key == "url": + g.add((cde_id, Constants.NIDM["url"], URIRef(value))) + elif key == "label": + g.add((cde_id, Constants.RDFS["label"], Literal(value))) + elif (key == "levels") or (key == "Levels"): + g.add((cde_id, Constants.NIDM["levels"], Literal(value))) + elif key == "source_variable": + g.add((cde_id, Constants.NIDM["sourceVariable"], Literal(value))) + elif key == "isAbout": + # dct_ns = Namespace(Constants.DCT) + # g.bind(prefix='dct', namespace=dct_ns) # added by DBK for multiple isAbout URLs and storing the labels along with URLs # first get a uuid has for the isAbout collection for this we'll use a hash of the isAbout list # as a string - #crc32hash = base_repr(crc32(str(value).encode()), 32).lower() + # crc32hash = base_repr(crc32(str(value).encode()), 32).lower() # now create the collection and for each isAbout create an entity to add to collection with # properties for label and url - #g.add((isabout_collection_id, RDF.type, Constants.PROV['Collection'])) + # g.add((isabout_collection_id, RDF.type, Constants.PROV['Collection'])) # for each isAbout entry, create new prov:Entity, store metadata and link it to the collection - #if we have multiple isAbouts then it will be stored as a list of dicts + # if we have multiple isAbouts then it will be stored as a list of dicts if isinstance(value, list): for subdict in value: for isabout_key, isabout_value in subdict.items(): - if (isabout_key == '@id') or (isabout_key == 'url'): + if (isabout_key == "@id") or (isabout_key == "url"): last_id = isabout_value # add isAbout key which is the url - g.add((cde_id, Constants.NIDM['isAbout'], URIRef(isabout_value))) - elif isabout_key == 'label': + g.add( + ( + cde_id, + Constants.NIDM["isAbout"], + URIRef(isabout_value), + ) + ) + elif isabout_key == "label": # now add another entity to contain the label - g.add((URIRef(last_id), RDF.type,Constants.PROV['Entity'])) - g.add((URIRef(last_id), Constants.RDFS['label'], Literal(isabout_value))) + g.add( + ( + URIRef(last_id), + RDF.type, + Constants.PROV["Entity"], + ) + ) + g.add( + ( + URIRef(last_id), + Constants.RDFS["label"], + Literal(isabout_value), + ) + ) # else we only have 1 isabout which is a dict else: - for isabout_key, isabout_value in value.items(): - if (isabout_key == '@id') or (isabout_key == 'url'): + if (isabout_key == "@id") or (isabout_key == "url"): last_id = isabout_value # add isAbout key which is the url - g.add((cde_id, Constants.NIDM['isAbout'], URIRef(isabout_value))) - elif isabout_key == 'label': + g.add( + ( + cde_id, + Constants.NIDM["isAbout"], + URIRef(isabout_value), + ) + ) + elif isabout_key == "label": # now add another entity to contain the label - g.add((URIRef(last_id), RDF.type,Constants.PROV['Entity'])) - g.add((URIRef(last_id), Constants.RDFS['label'], Literal(isabout_value))) - - elif key == 'valueType': - g.add((cde_id, Constants.NIDM['valueType'], URIRef(value))) - elif (key == 'minValue') or (key == 'minimumValue'): - g.add((cde_id, Constants.NIDM['minValue'], Literal(value))) - elif (key == 'maxValue') or (key == 'maximumValue'): - g.add((cde_id, Constants.NIDM['maxValue'], Literal(value))) - elif key == 'hasUnit': - g.add((cde_id, Constants.NIDM['unitCode'], Literal(value))) - elif key == 'sameAs': - g.add((cde_id, Constants.NIDM['sameAs'], URIRef(value))) - elif key == 'associatedWith': - g.add((cde_id, Constants.INTERLEX['ilx_0739289'], Literal(value))) - elif key == 'allowableValues': - g.add((cde_id, Constants.BIDS['allowableValues'], Literal(value))) + g.add((URIRef(last_id), RDF.type, Constants.PROV["Entity"])) + g.add( + ( + URIRef(last_id), + Constants.RDFS["label"], + Literal(isabout_value), + ) + ) + + elif key == "valueType": + g.add((cde_id, Constants.NIDM["valueType"], URIRef(value))) + elif (key == "minValue") or (key == "minimumValue"): + g.add((cde_id, Constants.NIDM["minValue"], Literal(value))) + elif (key == "maxValue") or (key == "maximumValue"): + g.add((cde_id, Constants.NIDM["maxValue"], Literal(value))) + elif key == "hasUnit": + g.add((cde_id, Constants.NIDM["unitCode"], Literal(value))) + elif key == "sameAs": + g.add((cde_id, Constants.NIDM["sameAs"], URIRef(value))) + elif key == "associatedWith": + g.add((cde_id, Constants.INTERLEX["ilx_0739289"], Literal(value))) + elif key == "allowableValues": + g.add((cde_id, Constants.BIDS["allowableValues"], Literal(value))) # testing # g.serialize(destination="/Users/dbkeator/Downloads/csv2nidm_cde.ttl", format='turtle') - - return g -def add_attributes_with_cde(prov_object, cde, row_variable, value): +def add_attributes_with_cde(prov_object, cde, row_variable, value): # find the ID in cdes where nidm:source_variable matches the row_variable # qres = cde.subjects(predicate=Constants.RDFS['label'],object=Literal(row_variable)) - qres = cde.subjects(predicate=Constants.NIDM['sourceVariable'],object=Literal(row_variable)) + qres = cde.subjects( + predicate=Constants.NIDM["sourceVariable"], object=Literal(row_variable) + ) for s in qres: entity_id = s # find prefix matching our url in rdflib graph...this is because we're bouncing between # prov and rdflib objects - for prefix,namespace in cde.namespaces(): - if namespace == URIRef(entity_id.rsplit('/',1)[0]+"/"): + for prefix, namespace in cde.namespaces(): + if namespace == URIRef(entity_id.rsplit("/", 1)[0] + "/"): cde_prefix = prefix - # this basically stores the row_data with the predicate being the cde id from above. - prov_object.add_attributes({QualifiedName(provNamespace(prefix=cde_prefix, \ - uri=entity_id.rsplit('/',1)[0]+"/"),entity_id.rsplit('/', 1)[-1]):value}) - #prov_object.add_attributes({QualifiedName(Constants.NIIRI,entity_id):value}) + # this basically stores the row_data with the predicate being the cde id from above. + prov_object.add_attributes( + { + QualifiedName( + provNamespace( + prefix=cde_prefix, uri=entity_id.rsplit("/", 1)[0] + "/" + ), + entity_id.rsplit("/", 1)[-1], + ): value + } + ) + # prov_object.add_attributes({QualifiedName(Constants.NIIRI,entity_id):value}) break - -def addDataladDatasetUUID(project_uuid,bidsroot_directory,graph): - ''' +def addDataladDatasetUUID(project_uuid, bidsroot_directory, graph): + """ This function will add the datalad unique ID for this dataset to the project entity uuid in graph. This UUID will ultimately be used by datalad to identify the dataset :param project_uuid: unique project activity ID in graph to add tuple :param bidsroot_directory: root directory for which to collect datalad uuids :return: augmented graph with datalad unique IDs - ''' + """ -def addGitAnnexSources(obj, bids_root, filepath = None): - ''' + +def addGitAnnexSources(obj, bids_root, filepath=None): + """ This function will add git-annex sources as tuples to entity uuid in graph. These sources can ultimately be used to retrieve the file(s) described in the entity uuid using git-annex (or datalad) :param obj: entity/activity object to add tuples @@ -2104,11 +2907,11 @@ def addGitAnnexSources(obj, bids_root, filepath = None): git annex source url will be added to obj instead of filepath git annex source url. :param bids_root: root directory of BIDS dataset :return: number of sources found - ''' + """ # load git annex information if exists try: - repo = AnnexRepo(bids_root,create=False) + repo = AnnexRepo(bids_root, create=False) if filepath is not None: sources = repo.get_urls(filepath) else: @@ -2118,39 +2921,35 @@ def addGitAnnexSources(obj, bids_root, filepath = None): # add to graph uuid obj.add_attributes({Constants.PROV["Location"]: URIRef(source)}) - return len(sources) except Exception as e: - #if "No annex found at" not in str(e): + # if "No annex found at" not in str(e): # print("Warning, error with AnnexRepo (Utils.py, addGitAnnexSources): %s" %str(e)) return 0 def tupleKeysToSimpleKeys(dict): - ''' + """ This function will change the keys in the supplied dictionary from tuple keys (e.g. from ..core.Constants import DD) to simple keys where key is variable name :param dict: dictionary created from map_variables_to_terms :return: new dictionary with simple keys - ''' + """ - new_dict={} + new_dict = {} for key in dict: key_tuple = eval(key) for subkey, item in key_tuple._asdict().items(): - if subkey == 'variable': - new_dict[item]={} + if subkey == "variable": + new_dict[item] = {} for varkeys, varvalues in dict[str(key_tuple)].items(): new_dict[item][varkeys] = varvalues - return new_dict - def validate_uuid(uuid_string): - """ Validate that a UUID string is in fact a valid uuid4. diff --git a/nidm/experiment/__init__.py b/nidm/experiment/__init__.py index f9a7a345..27413e69 100644 --- a/nidm/experiment/__init__.py +++ b/nidm/experiment/__init__.py @@ -1,15 +1,15 @@ -from .Core import Core -from .Project import Project -from .Session import Session from .Acquisition import Acquisition -from .AssessmentAcquisition import AssessmentAcquisition -from .MRAcquisition import MRAcquisition -from .PETAcquisition import PETAcquisition from .AcquisitionObject import AcquisitionObject -from .MRObject import MRObject -from .PETObject import PETObject -from .DemographicsObject import DemographicsObject +from .AssessmentAcquisition import AssessmentAcquisition from .AssessmentObject import AssessmentObject +from .Core import Core +from .DataElement import DataElement +from .DemographicsObject import DemographicsObject from .Derivative import Derivative from .DerivativeObject import DerivativeObject -from .DataElement import DataElement +from .MRAcquisition import MRAcquisition +from .MRObject import MRObject +from .PETAcquisition import PETAcquisition +from .PETObject import PETObject +from .Project import Project +from .Session import Session diff --git a/nidm/experiment/tests/create_testfile.py b/nidm/experiment/tests/create_testfile.py index 8b7157b9..c485be4b 100644 --- a/nidm/experiment/tests/create_testfile.py +++ b/nidm/experiment/tests/create_testfile.py @@ -1,70 +1,89 @@ -import os,sys - -from nidm.experiment import Project,Session,MRAcquisition,MRObject, \ - AssessmentAcquisition, AssessmentObject, DemographicsObject +import os +import sys from nidm.core import Constants +from nidm.experiment import ( + AssessmentAcquisition, + AssessmentObject, + DemographicsObject, + MRAcquisition, + MRObject, + Project, + Session, +) # dj TODO: adding more tests; I only put the Dave's pipeline to a function def main(argv): - #create new nidm-experiment document with project - kwargs={Constants.NIDM_PROJECT_NAME:"Test Project name",Constants.NIDM_PROJECT_IDENTIFIER:"123456",Constants.NIDM_PROJECT_DESCRIPTION:"Test Project Description"} + # create new nidm-experiment document with project + kwargs = { + Constants.NIDM_PROJECT_NAME: "Test Project name", + Constants.NIDM_PROJECT_IDENTIFIER: "123456", + Constants.NIDM_PROJECT_DESCRIPTION: "Test Project Description", + } project = Project(attributes=kwargs) - - #test add string attribute with existing namespace - #nidm_doc.addLiteralAttribute("nidm","isFun","ForMe") + # test add string attribute with existing namespace + # nidm_doc.addLiteralAttribute("nidm","isFun","ForMe") # project.add_attributes({Constants.PROV["Location"]:"http://nidm.nidash.org/"}) - #test add PI to investigation - project_PI = project.add_person(attributes={Constants.NIDM_FAMILY_NAME:"Doe", Constants.NIDM_GIVEN_NAME:"John"}) + # test add PI to investigation + project_PI = project.add_person( + attributes={ + Constants.NIDM_FAMILY_NAME: "Doe", + Constants.NIDM_GIVEN_NAME: "John", + } + ) - #add qualified association of project PI to project activity - project.add_qualified_association(person=project_PI,role=Constants.NIDM_PI) + # add qualified association of project PI to project activity + project.add_qualified_association(person=project_PI, role=Constants.NIDM_PI) - #test add session to graph and associate with project + # test add session to graph and associate with project session = Session(project) - session.add_attributes({Constants.NIDM_DESCRIPTION:"test session activity"}) + session.add_attributes({Constants.NIDM_DESCRIPTION: "test session activity"}) - #test add MR acquisition activity / entity to graph and associate with session + # test add MR acquisition activity / entity to graph and associate with session acq_act = MRAcquisition(session=session) - #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT + # test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT acq_entity = MRObject(acquisition=acq_act) - #add person to graph - person = acq_act.add_person(attributes={Constants.NIDM_GIVEN_NAME:"George"}) - #add qualified association of person with role NIDM_PARTICIPANT, and associated with acquistion activity + # add person to graph + person = acq_act.add_person(attributes={Constants.NIDM_GIVEN_NAME: "George"}) + # add qualified association of person with role NIDM_PARTICIPANT, and associated with acquistion activity acq_act.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT) - - #test add Assessment acquisition activity / entity to graph and associate with session + # test add Assessment acquisition activity / entity to graph and associate with session acq_act = AssessmentAcquisition(session=session) - #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT + # test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT acq_entity = AssessmentObject(acquisition=acq_act) - acq_entity.add_attributes({Constants.NIDM["Q1"]:"Q1 Answer",Constants.NIDM["Q2"]:"Q2 Answer" }) - #associate person as participant + acq_entity.add_attributes( + {Constants.NIDM["Q1"]: "Q1 Answer", Constants.NIDM["Q2"]: "Q2 Answer"} + ) + # associate person as participant acq_act.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT) - - #test add DemographicsAssessment acquisition activity / entity to graph and associate with session + # test add DemographicsAssessment acquisition activity / entity to graph and associate with session acq_act = AssessmentAcquisition(session=session) - #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT + # test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT acq_entity = DemographicsObject(acquisition=acq_act) - #add new person to graph - person2 = acq_act.add_person(attributes={Constants.NIDM_FAMILY_NAME:"Doe", \ - Constants.NIDM_GIVEN_NAME:"John"}) - #associate person2 with assessment acquisition + # add new person to graph + person2 = acq_act.add_person( + attributes={ + Constants.NIDM_FAMILY_NAME: "Doe", + Constants.NIDM_GIVEN_NAME: "John", + } + ) + # associate person2 with assessment acquisition acq_act.add_qualified_association(person=person2, role=Constants.NIDM_PARTICIPANT) - acq_entity.add_attributes({Constants.NIDM_AGE:60,Constants.NIDM_GENDER:"Male" }) + acq_entity.add_attributes({Constants.NIDM_AGE: 60, Constants.NIDM_GENDER: "Male"}) + # save a turtle file + with open("test_nidm.ttl", "w") as f: + f.write(project.serializeTurtle()) - #save a turtle file - with open("test_nidm.ttl",'w') as f: - f.write (project.serializeTurtle()) + # save a DOT graph as PDF + project.save_DotGraph("test_nidm.png", format="png") - #save a DOT graph as PDF - project.save_DotGraph("test_nidm.png",format="png") if __name__ == "__main__": - main(sys.argv[1:]) + main(sys.argv[1:]) diff --git a/nidm/experiment/tests/read_nidm.py b/nidm/experiment/tests/read_nidm.py index 2a0ab082..85e02cd5 100644 --- a/nidm/experiment/tests/read_nidm.py +++ b/nidm/experiment/tests/read_nidm.py @@ -1,26 +1,29 @@ -import os,sys - -from nidm.experiment import Project,Session -from nidm.core import Constants -from nidm.experiment.Utils import read_nidm from argparse import ArgumentParser -from os.path import dirname, join, splitext import json +import os +from os.path import dirname, join, splitext +import sys +from nidm.core import Constants +from nidm.experiment import Project, Session +from nidm.experiment.Utils import read_nidm + def main(argv): parser = ArgumentParser() - #parse command line arguments - parser.add_argument('-nidm', dest='nidm_file', required=True, help="NIDM-Exp RDF File to import") - parser.add_argument('-out', dest='outfile',required=True, help="output file name") + # parse command line arguments + parser.add_argument( + "-nidm", dest="nidm_file", required=True, help="NIDM-Exp RDF File to import" + ) + parser.add_argument("-out", dest="outfile", required=True, help="output file name") args = parser.parse_args() project = read_nidm(args.nidm_file) - print("Project: \n %s" %project.get_uuid()) + print("Project: \n %s" % project.get_uuid()) sessions = project.get_sessions() print("Sessions:\n %s" % sessions) - acquisitions=[] + acquisitions = [] for session in sessions: acquisitions = session.get_acquisitions() print("Acquisitions:\n %s" % acquisitions) @@ -34,14 +37,16 @@ def main(argv): # derivatives - #and for derivatives + # and for derivatives print("Derivatives: \n %s" % project.get_derivatives()) for deriv in project.get_derivatives(): derivobj = deriv.get_derivative_objects() - print("Derivative Objects: \n %s" %derivobj) + print("Derivative Objects: \n %s" % derivobj) - with open(args.outfile, 'w') as f: - #serialize project for comparison with the original + with open(args.outfile, "w") as f: + # serialize project for comparison with the original f.write(project.serializeTurtle()) + + if __name__ == "__main__": - main(sys.argv[1:]) \ No newline at end of file + main(sys.argv[1:]) diff --git a/nidm/experiment/tests/termsearch.py b/nidm/experiment/tests/termsearch.py index 61093c5c..bf2fce99 100644 --- a/nidm/experiment/tests/termsearch.py +++ b/nidm/experiment/tests/termsearch.py @@ -1,17 +1,25 @@ -import os,sys -import pytest, pdb -from pprint import pprint - from argparse import ArgumentParser +import os +import pdb +from pprint import pprint +import sys from nidm.experiment import Utils +import pytest + def main(argv): - parser = ArgumentParser(description='This program will query SciCrunch term labels for query_string using key and print out the return JSON packet.') - parser.add_argument('-query_string', dest='query_string', required=True, help="Query String") - parser.add_argument('-key', dest='key', required=True, help="SciCrunch API key to use for query") + parser = ArgumentParser( + description="This program will query SciCrunch term labels for query_string using key and print out the return JSON packet." + ) + parser.add_argument( + "-query_string", dest="query_string", required=True, help="Query String" + ) + parser.add_argument( + "-key", dest="key", required=True, help="SciCrunch API key to use for query" + ) args = parser.parse_args() - #Test exact match search returning JSON package + # Test exact match search returning JSON package print("Testing term label search...") json_data = Utils.QuerySciCrunchTermLabel(args.key, args.query_string) print("Term label search returns:") @@ -19,34 +27,42 @@ def main(argv): pprint(json_data) print("\n\n") - #Test elastic search using CDEs and Terms + ancestors (simulates tagging sets of terms for NIDM use) returning JSON package + # Test elastic search using CDEs and Terms + ancestors (simulates tagging sets of terms for NIDM use) returning JSON package print("Testing elastic search...") json_data = Utils.QuerySciCrunchElasticSearch(args.key, args.query_string) print("Elastic search returns:") print("-------------------------------------------") pprint(json_data) - # print("\n\n-------------------------------------------") - # print("Example terms listing from elastic search:") - - #example printing term label, definition, and preferred URL - # for term in json_data['hits']['hits']: - # #find preferred URL - # for items in term['_source']['existing_ids']: - # if items['preferred']=='1': - # preferred_url=items['iri'] - # print("Label = %s \t Definition = %s \t Preferred URL = %s " %(term['_source']['label'],term['_source']['definition'],preferred_url)) - - #example of uber elastic search query returns dictionary of label, definition, and preferred_url + # print("\n\n-------------------------------------------") + # print("Example terms listing from elastic search:") + + # example printing term label, definition, and preferred URL + # for term in json_data['hits']['hits']: + # #find preferred URL + # for items in term['_source']['existing_ids']: + # if items['preferred']=='1': + # preferred_url=items['iri'] + # print("Label = %s \t Definition = %s \t Preferred URL = %s " %(term['_source']['label'],term['_source']['definition'],preferred_url)) + + # example of uber elastic search query returns dictionary of label, definition, and preferred_url print("\n\n-------------------------------------------") print("Example uber elastic search:") - results = Utils.GetNIDMTermsFromSciCrunch(args.key,args.query_string) - for key,value in results.items(): - print("Label: %s \t Definition: %s \t Preferred URL: %s " %(results[key]['label'],results[key]['definition'],results[key]['preferred_url'] )) + results = Utils.GetNIDMTermsFromSciCrunch(args.key, args.query_string) + for key, value in results.items(): + print( + "Label: %s \t Definition: %s \t Preferred URL: %s " + % ( + results[key]["label"], + results[key]["definition"], + results[key]["preferred_url"], + ) + ) + if __name__ == "__main__": - main(sys.argv[1:]) + main(sys.argv[1:]) + # very simple test, just checking if main does not give any error def test_main(): main(sys.argv[1:]) - diff --git a/nidm/experiment/tests/test_experiment.py b/nidm/experiment/tests/test_experiment.py index 3095bf3a..89b19483 100644 --- a/nidm/experiment/tests/test_experiment.py +++ b/nidm/experiment/tests/test_experiment.py @@ -1,90 +1,108 @@ -import os,sys - -from nidm.experiment import Project,Session,MRAcquisition,MRObject, \ - AssessmentAcquisition, AssessmentObject, DemographicsObject +import os +import sys from nidm.core import Constants +from nidm.experiment import ( + AssessmentAcquisition, + AssessmentObject, + DemographicsObject, + MRAcquisition, + MRObject, + Project, + Session, +) # dj TODO: adding more tests; I only put the Dave's pipeline to a function def main(argv): - #create new nidm-experiment document with project - kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} + # create new nidm-experiment document with project + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", + Constants.NIDM_PROJECT_IDENTIFIER: 9610, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", + } project = Project(attributes=kwargs) - - #test add string attribute with existing namespace - #nidm_doc.addLiteralAttribute("nidm","isFun","ForMe") - project.add_attributes({Constants.NIDM["isFun"]:"ForMe"}) + # test add string attribute with existing namespace + # nidm_doc.addLiteralAttribute("nidm","isFun","ForMe") + project.add_attributes({Constants.NIDM["isFun"]: "ForMe"}) - #test adding string attribute with new namespace/term - project.addLiteralAttribute("fred","notFound","in namespaces","www.fred.org/") + # test adding string attribute with new namespace/term + project.addLiteralAttribute("fred", "notFound", "in namespaces", "www.fred.org/") - #test add float attribute + # test add float attribute project.addLiteralAttribute("nidm", "float", float(2.34)) - #test adding attributes in bulk with mix of existing and new namespaces - #nidm_doc.addAttributesWithNamespaces(nidm_doc.getProject(),[{"prefix":"nidm", "uri":nidm_doc.namespaces["nidm"], "term":"score", "value":int(15)}, \ - # {"prefix":"dave", "uri":"http://www.davidkeator.com/", "term":"isAwesome", "value":"15"}, \ - # {"prefix":"nidm", "uri":nidm_doc.namespaces["nidm"], "term":"value", "value":float(2.34)}]) - - #nidm_doc.addAttributes(nidm_doc.getProject(),{"nidm:test":int(15), "ncit:isTerminology":"15","ncit:joker":float(1)}) + # test adding attributes in bulk with mix of existing and new namespaces + # nidm_doc.addAttributesWithNamespaces(nidm_doc.getProject(),[{"prefix":"nidm", "uri":nidm_doc.namespaces["nidm"], "term":"score", "value":int(15)}, \ + # {"prefix":"dave", "uri":"http://www.davidkeator.com/", "term":"isAwesome", "value":"15"}, \ + # {"prefix":"nidm", "uri":nidm_doc.namespaces["nidm"], "term":"value", "value":float(2.34)}]) + # nidm_doc.addAttributes(nidm_doc.getProject(),{"nidm:test":int(15), "ncit:isTerminology":"15","ncit:joker":float(1)}) - #test add PI to investigation - project_PI = project.add_person(attributes={Constants.NIDM_FAMILY_NAME:"Keator", Constants.NIDM_GIVEN_NAME:"David"}) + # test add PI to investigation + project_PI = project.add_person( + attributes={ + Constants.NIDM_FAMILY_NAME: "Keator", + Constants.NIDM_GIVEN_NAME: "David", + } + ) - #add qualified association of project PI to project activity - project.add_qualified_association(person=project_PI,role=Constants.NIDM_PI) + # add qualified association of project PI to project activity + project.add_qualified_association(person=project_PI, role=Constants.NIDM_PI) - #test add session to graph and associate with project + # test add session to graph and associate with project session = Session(project) - session.add_attributes({Constants.NIDM:"test"}) - #project.add_sessions(session) + session.add_attributes({Constants.NIDM: "test"}) + # project.add_sessions(session) - #test add MR acquisition activity / entity to graph and associate with session + # test add MR acquisition activity / entity to graph and associate with session acq_act = MRAcquisition(session=session) - #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT + # test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT acq_entity = MRObject(acquisition=acq_act) - #add person to graph - person = acq_act.add_person(attributes={Constants.NIDM_GIVEN_NAME:"George"}) - #add qualified association of person with role NIDM_PARTICIPANT, and associated with acquistion activity + # add person to graph + person = acq_act.add_person(attributes={Constants.NIDM_GIVEN_NAME: "George"}) + # add qualified association of person with role NIDM_PARTICIPANT, and associated with acquistion activity acq_act.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT) - - #test add Assessment acquisition activity / entity to graph and associate with session + # test add Assessment acquisition activity / entity to graph and associate with session acq_act = AssessmentAcquisition(session=session) - #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT + # test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT acq_entity = AssessmentObject(acquisition=acq_act) - acq_entity.add_attributes({Constants.NIDM["Q1"]:"Q1 Answer",Constants.NIDM["Q2"]:"Q2 Answer" }) - #associate person as participant + acq_entity.add_attributes( + {Constants.NIDM["Q1"]: "Q1 Answer", Constants.NIDM["Q2"]: "Q2 Answer"} + ) + # associate person as participant acq_act.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT) - - #test add DemographicsAssessment acquisition activity / entity to graph and associate with session + # test add DemographicsAssessment acquisition activity / entity to graph and associate with session acq_act = AssessmentAcquisition(session=session) - #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT + # test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT acq_entity = DemographicsObject(acquisition=acq_act) - #add new person to graph - person2 = acq_act.add_person(attributes={Constants.NIDM_FAMILY_NAME:"Doe", \ - Constants.NIDM_GIVEN_NAME:"John"}) - #associate person2 with assessment acquisition + # add new person to graph + person2 = acq_act.add_person( + attributes={ + Constants.NIDM_FAMILY_NAME: "Doe", + Constants.NIDM_GIVEN_NAME: "John", + } + ) + # associate person2 with assessment acquisition acq_act.add_qualified_association(person=person2, role=Constants.NIDM_PARTICIPANT) - acq_entity.add_attributes({Constants.NIDM_AGE:60,Constants.NIDM_GENDER:"Male" }) - + acq_entity.add_attributes({Constants.NIDM_AGE: 60, Constants.NIDM_GENDER: "Male"}) - #save a turtle file - with open("test.ttl",'w') as f: - f.write (project.serializeTurtle()) + # save a turtle file + with open("test.ttl", "w") as f: + f.write(project.serializeTurtle()) - #save a DOT graph as PDF + # save a DOT graph as PDF # project.save_DotGraph("test.png",format="png") + if __name__ == "__main__": - main(sys.argv[1:]) + main(sys.argv[1:]) + # very simple test, just checking if main does not give any error def test_main(): main(sys.argv[1:]) - diff --git a/nidm/experiment/tests/test_experiment_basic.py b/nidm/experiment/tests/test_experiment_basic.py index 3adba741..c3dc80b3 100644 --- a/nidm/experiment/tests/test_experiment_basic.py +++ b/nidm/experiment/tests/test_experiment_basic.py @@ -1,32 +1,39 @@ -import os,sys -import pytest, pdb -from os import remove +from io import StringIO import json -from nidm.experiment import Project, Session, Acquisition, AcquisitionObject +import os +from os import remove +import pdb +import sys from nidm.core import Constants -from io import StringIO -from rdflib import Graph +from nidm.experiment import Acquisition, AcquisitionObject, Project, Session from nidm.experiment.Utils import read_nidm +import prov +import pytest +import rdflib +from rdflib import Graph -import prov, rdflib def test_1(tmpdir): tmpdir.chdir() project = Project() - #save a turtle file - with open("test.ttl",'w') as f: + # save a turtle file + with open("test.ttl", "w") as f: f.write(project.serializeTurtle()) def test_2(tmpdir): tmpdir.chdir() - kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", + Constants.NIDM_PROJECT_IDENTIFIER: 9610, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", + } project = Project(attributes=kwargs) - with open("test.ttl",'w') as f: + with open("test.ttl", "w") as f: f.write(project.serializeTurtle()) @@ -88,7 +95,7 @@ def test_project_noparameters(): # checking type proj_type = proj.get_type() - assert eval(proj_type.provn_representation()) == 'prov:Activity' + assert eval(proj_type.provn_representation()) == "prov:Activity" # checking length of graph records; it doesn work if all tests are run assert len(proj.graph.get_records()) == 1 @@ -107,7 +114,7 @@ def test_project_emptygraph(): # checking type proj_type = proj.get_type() - assert eval(proj_type.provn_representation()) == 'prov:Activity' + assert eval(proj_type.provn_representation()) == "prov:Activity" assert len(proj.graph.get_records()) == 1 @@ -127,7 +134,7 @@ def test_project_uuid(): # checking type proj_type = proj.get_type() - assert eval(proj_type.provn_representation()) == 'prov:Activity' + assert eval(proj_type.provn_representation()) == "prov:Activity" # checking if uuid is correct assert proj.identifier.localpart == "my_uuid" @@ -138,7 +145,9 @@ def test_project_uuid(): def test_project_att(): # creating project without parameters - proj = Project(attributes={prov.model.QualifiedName(Constants.NIDM, "title"): "MyPRoject"}) + proj = Project( + attributes={prov.model.QualifiedName(Constants.NIDM, "title"): "MyPRoject"} + ) # checking if we created ProvDocument assert type(proj.bundle) is Constants.NIDMDocument @@ -147,11 +156,13 @@ def test_project_att(): # checking graph namespace const_l = list(Constants.namespaces) namesp = [i.prefix for i in proj.graph.namespaces] - assert sorted(const_l+[rdflib.term.URIRef('http://purl.org/nidash/nidm#prefix')]) == sorted(namesp) + assert sorted( + const_l + [rdflib.term.URIRef("http://purl.org/nidash/nidm#prefix")] + ) == sorted(namesp) # checking type proj_type = proj.get_type() - assert eval(proj_type.provn_representation()) == 'prov:Activity' + assert eval(proj_type.provn_representation()) == "prov:Activity" # checking length of graph records; it doesn work if all tests are run assert len(proj.graph.get_records()) == 1 @@ -176,58 +187,61 @@ def test_session_noparameters(): # checking type proj_type = proj.get_type() - assert eval(proj_type.provn_representation()) == 'prov:Activity' + assert eval(proj_type.provn_representation()) == "prov:Activity" # checking length of graph records; it doesn work if all tests are run assert len(proj.graph.get_records()) == 2 def test_jsonld_exports(): - - kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} - project = Project(uuid="_123456",attributes=kwargs) - - - #save a turtle file - with open("test.json",'w') as f: + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", + Constants.NIDM_PROJECT_IDENTIFIER: 9610, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", + } + project = Project(uuid="_123456", attributes=kwargs) + + # save a turtle file + with open("test.json", "w") as f: f.write(project.serializeJSONLD()) - #load in JSON file + # load in JSON file with open("test.json") as json_file: data = json.load(json_file) + assert data["Identifier"]["@value"] == "9610" + # WIP Read back in json-ld file and check that we have the project info + # remove("test.json") - assert(data["Identifier"]['@value'] == "9610") - #WIP Read back in json-ld file and check that we have the project info - #remove("test.json") def test_project_trig_serialization(): - outfile = StringIO() + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", + Constants.NIDM_PROJECT_IDENTIFIER: 9610, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", + } + project = Project(uuid="_123456", attributes=kwargs) - kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} - project = Project(uuid="_123456",attributes=kwargs) - - - #save as trig file with graph identifier Constants.NIDM_Project + # save as trig file with graph identifier Constants.NIDM_Project test = project.serializeTrig(identifier=Constants.NIIRI["_996"]) - if not isinstance(test,str): - outfile.write(test.decode('ASCII')) + if not isinstance(test, str): + outfile.write(test.decode("ASCII")) else: outfile.write(test) outfile.seek(0) # WIP: RDFLib doesn't seem to have a Trig parser?!? - #load back into rdf graph and do assertions + # load back into rdf graph and do assertions # project2 = Graph() # project2.parse(source=outfile) - - #test some assertion on read file + # test some assertion on read file # print(project2.serialize(format='turtle').decode('ASCII')) # print(project2.serialize(format='trig').decode('ASCII')) -#TODO: checking -#attributes{pm.QualifiedName(Namespace("uci", "https.../"), "mascot"): "bleble", ...} + +# TODO: checking +# attributes{pm.QualifiedName(Namespace("uci", "https.../"), "mascot"): "bleble", ...} # (has to be "/" at the end (or #) diff --git a/nidm/experiment/tests/test_load_nidmowl.py b/nidm/experiment/tests/test_load_nidmowl.py index 26df9c2b..bb42936b 100644 --- a/nidm/experiment/tests/test_load_nidmowl.py +++ b/nidm/experiment/tests/test_load_nidmowl.py @@ -1,14 +1,12 @@ -import os,sys -import pytest, pdb - -from nidm.experiment import Project, Session, Acquisition, AcquisitionObject +import os +import pdb +import sys from nidm.core import Constants -from nidm.experiment.Utils import load_nidm_owl_files, fuzzy_match_terms_from_graph +from nidm.experiment import Acquisition, AcquisitionObject, Project, Session +from nidm.experiment.Utils import fuzzy_match_terms_from_graph, load_nidm_owl_files +import pytest -def test_loadowl(): +def test_loadowl(): owl_graph = load_nidm_owl_files() - owl_match = fuzzy_match_terms_from_graph(owl_graph,"WisconsinCardSortingTest") - - - + owl_match = fuzzy_match_terms_from_graph(owl_graph, "WisconsinCardSortingTest") diff --git a/nidm/experiment/tests/test_map_vars_to_terms.py b/nidm/experiment/tests/test_map_vars_to_terms.py index 3103dafe..ee333964 100644 --- a/nidm/experiment/tests/test_map_vars_to_terms.py +++ b/nidm/experiment/tests/test_map_vars_to_terms.py @@ -1,33 +1,42 @@ - -from pathlib import Path -import pytest -import pandas as pd import json import os -import urllib +from os.path import join +from pathlib import Path import re -from nidm.experiment.Utils import map_variables_to_terms import tempfile -from os.path import join -from nidm.core import Constants +import urllib from uuid import UUID - - - +from nidm.core import Constants +from nidm.experiment.Utils import map_variables_to_terms +import pandas as pd +import pytest @pytest.fixture(scope="module", autouse="True") def setup(): global DATA, REPROSCHEMA_JSON_MAP, BIDS_SIDECAR - temp = { 'participant_id': ['100', '101', '102', '103', '104', '105', '106', '107', '108', '109'], - 'age': [18, 25, 30,19 ,35 ,20 ,27 ,29 ,38 ,27], - 'sex': ['m', 'm', 'f', 'm', 'f', 'f', 'f', 'f', 'm','m'] } + temp = { + "participant_id": [ + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + ], + "age": [18, 25, 30, 19, 35, 20, 27, 29, 38, 27], + "sex": ["m", "m", "f", "m", "f", "f", "f", "f", "m", "m"], + } DATA = pd.DataFrame(temp) REPROSCHEMA_JSON_MAP = json.loads( - ''' + """ { "DD(source='participants.tsv', variable='participant_id')": { "label": "participant_id", @@ -83,10 +92,11 @@ def setup(): } ] } - }''') + }""" + ) BIDS_SIDECAR = json.loads( - ''' + """ { "age": { "label": "age", @@ -124,69 +134,109 @@ def setup(): ] } } - - ''') + + """ + ) def test_map_vars_to_terms_BIDS(): - ''' + """ This function will test the Utils.py "map_vars_to_terms" function with a BIDS-formatted JSON sidecar file - ''' - + """ global DATA, BIDS_SIDECAR - column_to_terms, cde = map_variables_to_terms(df=DATA,json_source=BIDS_SIDECAR, - directory=tempfile.gettempdir(),assessment_name="test",bids=True) + column_to_terms, cde = map_variables_to_terms( + df=DATA, + json_source=BIDS_SIDECAR, + directory=tempfile.gettempdir(), + assessment_name="test", + bids=True, + ) # check whether JSON mapping structure returned from map_variables_to_terms matches the # reproshema structure assert "DD(source='test', variable='age')" in column_to_terms.keys() assert "DD(source='test', variable='sex')" in column_to_terms.keys() assert "isAbout" in column_to_terms["DD(source='test', variable='age')"].keys() - assert "http://uri.interlex.org/ilx_0100400" == column_to_terms["DD(source='test', variable='age')"] \ - ['isAbout'][0]['@id'] - assert "http://uri.interlex.org/ilx_0738439" == column_to_terms["DD(source='test', variable='sex')"] \ - ['isAbout'][0]['@id'] - assert "responseOptions" in column_to_terms["DD(source='test', variable='sex')"].keys() - assert "choices" in column_to_terms["DD(source='test', variable='sex')"]['responseOptions'].keys() - assert "Male" in column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices'].keys() - assert "m" == column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices']['Male'] - assert "Male" in column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices'].keys() - assert "m" == column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices']['Male'] + assert ( + "http://uri.interlex.org/ilx_0100400" + == column_to_terms["DD(source='test', variable='age')"]["isAbout"][0]["@id"] + ) + assert ( + "http://uri.interlex.org/ilx_0738439" + == column_to_terms["DD(source='test', variable='sex')"]["isAbout"][0]["@id"] + ) + assert ( + "responseOptions" in column_to_terms["DD(source='test', variable='sex')"].keys() + ) + assert ( + "choices" + in column_to_terms["DD(source='test', variable='sex')"][ + "responseOptions" + ].keys() + ) + assert ( + "Male" + in column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ].keys() + ) + assert ( + "m" + == column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ]["Male"] + ) + assert ( + "Male" + in column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ].keys() + ) + assert ( + "m" + == column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ]["Male"] + ) # now check the JSON sidecar file created by map_variables_to_terms which should match BIDS format - with open(join(tempfile.gettempdir(),"nidm_annotations.json")) as fp: + with open(join(tempfile.gettempdir(), "nidm_annotations.json")) as fp: bids_sidecar = json.load(fp) assert "age" in bids_sidecar.keys() assert "sex" in bids_sidecar.keys() assert "isAbout" in bids_sidecar["age"].keys() - assert "http://uri.interlex.org/ilx_0100400" == bids_sidecar["age"] \ - ['isAbout'][0]['@id'] - assert "http://uri.interlex.org/ilx_0738439" == bids_sidecar["sex"] \ - ['isAbout'][0]['@id'] + assert ( + "http://uri.interlex.org/ilx_0100400" + == bids_sidecar["age"]["isAbout"][0]["@id"] + ) + assert ( + "http://uri.interlex.org/ilx_0738439" + == bids_sidecar["sex"]["isAbout"][0]["@id"] + ) assert "levels" in bids_sidecar["sex"].keys() - assert "Male" in bids_sidecar["sex"]['levels'].keys() - assert "m" == bids_sidecar["sex"]['levels']['Male'] - assert "Male" in bids_sidecar["sex"]['levels'].keys() - assert "m" == bids_sidecar["sex"]['levels']['Male'] + assert "Male" in bids_sidecar["sex"]["levels"].keys() + assert "m" == bids_sidecar["sex"]["levels"]["Male"] + assert "Male" in bids_sidecar["sex"]["levels"].keys() + assert "m" == bids_sidecar["sex"]["levels"]["Male"] # check the CDE dataelement graph for correct information - query = ''' + query = """ prefix rdfs: - + select distinct ?uuid ?DataElements ?property ?value where { ?uuid a/rdfs:subClassOf* nidm:DataElement ; ?property ?value . - }''' - qres=cde.query(query) + }""" + qres = cde.query(query) - results=[] + results = [] for row in qres: results.append(list(row)) @@ -194,31 +244,66 @@ def test_map_vars_to_terms_BIDS(): def test_map_vars_to_terms_reproschema(): - ''' + """ This function will test the Utils.py "map_vars_to_terms" function with a reproschema-formatted JSON sidecar file - ''' + """ global DATA, REPROSCHEMA_JSON_MAP - column_to_terms, cde = map_variables_to_terms(df=DATA, json_source=REPROSCHEMA_JSON_MAP, - directory=tempfile.gettempdir(), assessment_name="test") + column_to_terms, cde = map_variables_to_terms( + df=DATA, + json_source=REPROSCHEMA_JSON_MAP, + directory=tempfile.gettempdir(), + assessment_name="test", + ) # check whether JSON mapping structure returned from map_variables_to_terms matches the # reproshema structure assert "DD(source='test', variable='age')" in column_to_terms.keys() assert "DD(source='test', variable='sex')" in column_to_terms.keys() assert "isAbout" in column_to_terms["DD(source='test', variable='age')"].keys() - assert "http://uri.interlex.org/ilx_0100400" == column_to_terms["DD(source='test', variable='age')"] \ - ['isAbout'][0]['@id'] - assert "http://uri.interlex.org/ilx_0738439" == column_to_terms["DD(source='test', variable='sex')"] \ - ['isAbout'][0]['@id'] - assert "responseOptions" in column_to_terms["DD(source='test', variable='sex')"].keys() - assert "choices" in column_to_terms["DD(source='test', variable='sex')"]['responseOptions'].keys() - assert "Male" in column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices'].keys() - assert "m" == column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices']['Male'] - assert "Male" in column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices'].keys() - assert "m" == column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices']['Male'] + assert ( + "http://uri.interlex.org/ilx_0100400" + == column_to_terms["DD(source='test', variable='age')"]["isAbout"][0]["@id"] + ) + assert ( + "http://uri.interlex.org/ilx_0738439" + == column_to_terms["DD(source='test', variable='sex')"]["isAbout"][0]["@id"] + ) + assert ( + "responseOptions" in column_to_terms["DD(source='test', variable='sex')"].keys() + ) + assert ( + "choices" + in column_to_terms["DD(source='test', variable='sex')"][ + "responseOptions" + ].keys() + ) + assert ( + "Male" + in column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ].keys() + ) + assert ( + "m" + == column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ]["Male"] + ) + assert ( + "Male" + in column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ].keys() + ) + assert ( + "m" + == column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ]["Male"] + ) # now check the JSON mapping file created by map_variables_to_terms which should match Reproschema format with open(join(tempfile.gettempdir(), "nidm_annotations.json")) as fp: @@ -227,19 +312,50 @@ def test_map_vars_to_terms_reproschema(): assert "DD(source='test', variable='age')" in column_to_terms.keys() assert "DD(source='test', variable='sex')" in column_to_terms.keys() assert "isAbout" in column_to_terms["DD(source='test', variable='age')"].keys() - assert "http://uri.interlex.org/ilx_0100400" == column_to_terms["DD(source='test', variable='age')"] \ - ['isAbout'][0]['@id'] - assert "http://uri.interlex.org/ilx_0738439" == column_to_terms["DD(source='test', variable='sex')"] \ - ['isAbout'][0]['@id'] - assert "responseOptions" in column_to_terms["DD(source='test', variable='sex')"].keys() - assert "choices" in column_to_terms["DD(source='test', variable='sex')"]['responseOptions'].keys() - assert "Male" in column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices'].keys() - assert "m" == column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices']['Male'] - assert "Male" in column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices'].keys() - assert "m" == column_to_terms["DD(source='test', variable='sex')"]['responseOptions']['choices']['Male'] + assert ( + "http://uri.interlex.org/ilx_0100400" + == column_to_terms["DD(source='test', variable='age')"]["isAbout"][0]["@id"] + ) + assert ( + "http://uri.interlex.org/ilx_0738439" + == column_to_terms["DD(source='test', variable='sex')"]["isAbout"][0]["@id"] + ) + assert ( + "responseOptions" in column_to_terms["DD(source='test', variable='sex')"].keys() + ) + assert ( + "choices" + in column_to_terms["DD(source='test', variable='sex')"][ + "responseOptions" + ].keys() + ) + assert ( + "Male" + in column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ].keys() + ) + assert ( + "m" + == column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ]["Male"] + ) + assert ( + "Male" + in column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ].keys() + ) + assert ( + "m" + == column_to_terms["DD(source='test', variable='sex')"]["responseOptions"][ + "choices" + ]["Male"] + ) # check the CDE dataelement graph for correct information - query = ''' + query = """ prefix rdfs: select distinct ?uuid ?DataElements ?property ?value @@ -248,7 +364,7 @@ def test_map_vars_to_terms_reproschema(): ?uuid a/rdfs:subClassOf* nidm:DataElement ; ?property ?value . - }''' + }""" qres = cde.query(query) results = [] @@ -256,7 +372,3 @@ def test_map_vars_to_terms_reproschema(): results.append(list(row)) assert len(results) == 20 - - - - diff --git a/nidm/experiment/tests/test_navigate.py b/nidm/experiment/tests/test_navigate.py index 714db9a3..12cd8766 100644 --- a/nidm/experiment/tests/test_navigate.py +++ b/nidm/experiment/tests/test_navigate.py @@ -1,16 +1,15 @@ -from pathlib import Path -import pytest import os -import urllib +from pathlib import Path import re -from nidm.experiment import Navigate -from nidm.core import Constants +import urllib from uuid import UUID - +from nidm.core import Constants +from nidm.experiment import Navigate +import pytest USE_GITHUB_DATA = True -BRAIN_VOL_FILES = tuple(['./cmu_a.nidm.ttl', './caltech.nidm.ttl']) -OPENNEURO_FILES = tuple(['ds000110.nidm.ttl']) +BRAIN_VOL_FILES = tuple(["./cmu_a.nidm.ttl", "./caltech.nidm.ttl"]) +OPENNEURO_FILES = tuple(["ds000110.nidm.ttl"]) PROJECT_URI = None OPENNEURO_PROJECT_URI = None @@ -23,25 +22,25 @@ def setup(): if Path(f).is_file(): os.remove(f) - if not Path('./cmu_a.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./cmu_a.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl", - "cmu_a.nidm.ttl" + "cmu_a.nidm.ttl", ) - if not Path('./caltech.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./caltech.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/Caltech/nidm.ttl", - "caltech.nidm.ttl" + "caltech.nidm.ttl", ) projects = Navigate.getProjects(BRAIN_VOL_FILES) PROJECT_URI = projects[0] - if not Path('./ds000110.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./ds000110.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/openneuro/ds000110/nidm.ttl", - "ds000110.nidm.ttl" + "ds000110.nidm.ttl", ) projects2 = Navigate.getProjects(OPENNEURO_FILES) @@ -66,6 +65,7 @@ def test_navigate_get_acquisitions_for_session(): # for a in acquisitions: # print (str(a)) + def test_navigate_get_subjects_for_acquisition(): subjects = set([]) sessions = Navigate.getSessions(BRAIN_VOL_FILES, PROJECT_URI) @@ -78,7 +78,6 @@ def test_navigate_get_subjects_for_acquisition(): assert len(subjects) > 5 - def test_navigate_get_acquisition_data_by_session(): set_of_keys_returned = set([]) set_of_activities = set([]) @@ -95,10 +94,10 @@ def test_navigate_get_acquisition_data_by_session(): for vt in ad.data: set_of_keys_returned.add(vt.label) - print (set_of_keys_returned) + print(set_of_keys_returned) - assert 'age' in set_of_keys_returned - assert 'hadAcquisitionModality' in set_of_keys_returned + assert "age" in set_of_keys_returned + assert "hadAcquisitionModality" in set_of_keys_returned def test_navigate_get_acquisition_data_by_subject(): @@ -108,7 +107,9 @@ def test_navigate_get_acquisition_data_by_subject(): subjects = Navigate.getSubjects(OPENNEURO_FILES, OPENNEURO_PROJECT_URI) assert len(subjects) > 0 for s in subjects: - activities = Navigate.getActivities(nidm_file_tuples=OPENNEURO_FILES, subject_id=s) + activities = Navigate.getActivities( + nidm_file_tuples=OPENNEURO_FILES, subject_id=s + ) assert len(activities) > 0 for a in activities: set_of_activities.add(str(a)) @@ -117,13 +118,17 @@ def test_navigate_get_acquisition_data_by_subject(): for vt in ad.data: set_of_keys_returned.add(vt.label) - assert 'age' in set_of_keys_returned - assert 'sex' in set_of_keys_returned - assert 'hadAcquisitionModality' in set_of_keys_returned - assert 'hadImageUsageType' in set_of_keys_returned + assert "age" in set_of_keys_returned + assert "sex" in set_of_keys_returned + assert "hadAcquisitionModality" in set_of_keys_returned + assert "hadImageUsageType" in set_of_keys_returned def test_navigate_get_sub_uuid_from_id(): - uuids = Navigate.getSubjectUUIDsfromID(nidm_file_tuples=BRAIN_VOL_FILES, sub_id='50653') + uuids = Navigate.getSubjectUUIDsfromID( + nidm_file_tuples=BRAIN_VOL_FILES, sub_id="50653" + ) assert len(uuids) == 1 - assert re.match("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", uuids[0]) # check that it's a UUID + assert re.match( + "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", uuids[0] + ) # check that it's a UUID diff --git a/nidm/experiment/tests/test_nidm.ttl b/nidm/experiment/tests/test_nidm.ttl index b265419a..5aacadba 100644 --- a/nidm/experiment/tests/test_nidm.ttl +++ b/nidm/experiment/tests/test_nidm.ttl @@ -106,4 +106,3 @@ niiri:c067401a-0bea-11ea-8e05-003ee1ce9545 a nidm:Session, niiri:c067e01a-0bea-11ea-8e05-003ee1ce9545 a prov:Agent, prov:Person ; foaf:givenName "George"^^xsd:string . - diff --git a/nidm/experiment/tests/test_query.py b/nidm/experiment/tests/test_query.py index 4045ac58..0265e660 100644 --- a/nidm/experiment/tests/test_query.py +++ b/nidm/experiment/tests/test_query.py @@ -1,23 +1,28 @@ -import nidm.experiment.Navigate -from nidm.experiment import Project, Session, AssessmentAcquisition, AssessmentObject, Acquisition, AcquisitionObject, \ - Query -from nidm.core import Constants -from rdflib import Namespace, URIRef -import prov.model as pm -from os import remove, path, environ +import json +from os import environ, path, remove +from pathlib import Path import tempfile -import pytest +import urllib.request +from nidm.core import Constants +from nidm.experiment import ( + Acquisition, + AcquisitionObject, + AssessmentAcquisition, + AssessmentObject, + Project, + Query, + Session, +) from nidm.experiment.CDE import download_cde_files +import nidm.experiment.Navigate from nidm.experiment.tools.rest_statistics import GetProjectsComputedMetadata - - -from prov.model import ProvDocument, QualifiedName +import prov.model as pm from prov.model import Namespace as provNamespace -import json -import urllib.request -from pathlib import Path +from prov.model import ProvDocument, QualifiedName +import pytest +from rdflib import Namespace, URIRef -ABIDE_FILES = ('cmu_a.nidm.ttl',) +ABIDE_FILES = ("cmu_a.nidm.ttl",) cmu_test_project_uuid = None cmu_test_subject_uuid = None @@ -33,6 +38,7 @@ # project = Project(uuid="_654321",attributes=kwargs) USE_GITHUB_DATA = True + @pytest.fixture(scope="module", autouse="True") def setup(): global cmu_test_project_uuid, cmu_test_subject_uuid @@ -40,119 +46,148 @@ def setup(): projects = Query.GetProjectsUUID(ABIDE_FILES) for p in projects: proj_info = nidm.experiment.Navigate.GetProjectAttributes(ABIDE_FILES, p) - if 'dctypes:title' in proj_info.keys() and proj_info['dctypes:title'] == 'ABIDE - CMU_a': + if ( + "dctypes:title" in proj_info.keys() + and proj_info["dctypes:title"] == "ABIDE - CMU_a" + ): cmu_test_project_uuid = p break subjects = Query.GetParticipantIDs(ABIDE_FILES) - cmu_test_subject_uuid = subjects['uuid'][0] + cmu_test_subject_uuid = subjects["uuid"][0] def test_GetProjectMetadata(): + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", + Constants.NIDM_PROJECT_IDENTIFIER: 9610, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", + } + project = Project(uuid="_123456", attributes=kwargs) - kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} - project = Project(uuid="_123456",attributes=kwargs) - - - #save a turtle file - with open("test_gpm.ttl",'w') as f: + # save a turtle file + with open("test_gpm.ttl", "w") as f: f.write(project.serializeTurtle()) - kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseIII",Constants.NIDM_PROJECT_IDENTIFIER:1200,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation2"} - project = Project(uuid="_654321",attributes=kwargs) + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseIII", + Constants.NIDM_PROJECT_IDENTIFIER: 1200, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation2", + } + project = Project(uuid="_654321", attributes=kwargs) - - #save a turtle file - with open("test2_gpm.ttl",'w') as f: + # save a turtle file + with open("test2_gpm.ttl", "w") as f: f.write(project.serializeTurtle()) + # WIP test = Query.GetProjectMetadata(["test.ttl", "test2.ttl"]) - #WIP test = Query.GetProjectMetadata(["test.ttl", "test2.ttl"]) - - #assert URIRef(Constants.NIDM + "_654321") in test - #assert URIRef(Constants.NIDM + "_123456") in test - #assert URIRef(Constants.NIDM_PROJECT_IDENTIFIER + "1200") in test - #assert URIRef(Constants.NIDM_PROJECT_IDENTIFIER + "9610") in test - #assert URIRef((Constants.NIDM_PROJECT_NAME + "FBIRN_PhaseII")) in test - #assert URIRef((Constants.NIDM_PROJECT_NAME + "FBIRN_PhaseIII")) in test - #assert URIRef((Constants.NIDM_PROJECT_DESCRIPTION + "Test investigation")) in test - #assert URIRef((Constants.NIDM_PROJECT_DESCRIPTION + "Test investigation2")) in test + # assert URIRef(Constants.NIDM + "_654321") in test + # assert URIRef(Constants.NIDM + "_123456") in test + # assert URIRef(Constants.NIDM_PROJECT_IDENTIFIER + "1200") in test + # assert URIRef(Constants.NIDM_PROJECT_IDENTIFIER + "9610") in test + # assert URIRef((Constants.NIDM_PROJECT_NAME + "FBIRN_PhaseII")) in test + # assert URIRef((Constants.NIDM_PROJECT_NAME + "FBIRN_PhaseIII")) in test + # assert URIRef((Constants.NIDM_PROJECT_DESCRIPTION + "Test investigation")) in test + # assert URIRef((Constants.NIDM_PROJECT_DESCRIPTION + "Test investigation2")) in test remove("test_gpm.ttl") remove("test2_gpm.ttl") def test_GetProjects(): + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", + Constants.NIDM_PROJECT_IDENTIFIER: 9610, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", + } + project = Project(uuid="_123456", attributes=kwargs) - kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} - project = Project(uuid="_123456",attributes=kwargs) - - - #save a turtle file - with open("test_gp.ttl",'w') as f: + # save a turtle file + with open("test_gp.ttl", "w") as f: f.write(project.serializeTurtle()) project_list = Query.GetProjectsUUID(["test_gp.ttl"]) remove("test_gp.ttl") - assert Constants.NIIRI + "_123456" in [ str(x) for x in project_list] + assert Constants.NIIRI + "_123456" in [str(x) for x in project_list] -def test_GetParticipantIDs(): - kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} - project = Project(uuid="_123456",attributes=kwargs) - session = Session(uuid="_13579",project=project) - acq = Acquisition(uuid="_15793",session=session) - acq2 = Acquisition(uuid="_15795",session=session) - - person=acq.add_person(attributes=({Constants.NIDM_SUBJECTID:"9999"})) - acq.add_qualified_association(person=person,role=Constants.NIDM_PARTICIPANT) - - person2=acq2.add_person(attributes=({Constants.NIDM_SUBJECTID:"8888"})) - acq2.add_qualified_association(person=person2,role=Constants.NIDM_PARTICIPANT) +def test_GetParticipantIDs(): + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", + Constants.NIDM_PROJECT_IDENTIFIER: 9610, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", + } + project = Project(uuid="_123456", attributes=kwargs) + session = Session(uuid="_13579", project=project) + acq = Acquisition(uuid="_15793", session=session) + acq2 = Acquisition(uuid="_15795", session=session) + + person = acq.add_person(attributes=({Constants.NIDM_SUBJECTID: "9999"})) + acq.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT) + + person2 = acq2.add_person(attributes=({Constants.NIDM_SUBJECTID: "8888"})) + acq2.add_qualified_association(person=person2, role=Constants.NIDM_PARTICIPANT) - #save a turtle file - with open("test_3.ttl",'w') as f: + # save a turtle file + with open("test_3.ttl", "w") as f: f.write(project.serializeTurtle()) participant_list = Query.GetParticipantIDs(["test_3.ttl"]) remove("test_3.ttl") - assert (participant_list['ID'].str.contains('9999').any()) - assert (participant_list['ID'].str.contains('8888').any()) + assert participant_list["ID"].str.contains("9999").any() + assert participant_list["ID"].str.contains("8888").any() + def test_GetProjectInstruments(): - kwargs = {Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, - Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation"} + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", + Constants.NIDM_PROJECT_IDENTIFIER: 9610, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", + } proj_uuid = "_123456gpi" project = Project(uuid=proj_uuid, attributes=kwargs) session = Session(project) acq = AssessmentAcquisition(session) - kwargs={pm.PROV_TYPE:pm.QualifiedName(pm.Namespace("nidm",Constants.NIDM),"NorthAmericanAdultReadingTest")} - acq_obj = AssessmentObject(acq,attributes=kwargs) + kwargs = { + pm.PROV_TYPE: pm.QualifiedName( + pm.Namespace("nidm", Constants.NIDM), "NorthAmericanAdultReadingTest" + ) + } + acq_obj = AssessmentObject(acq, attributes=kwargs) acq2 = AssessmentAcquisition(session) - kwargs={pm.PROV_TYPE:pm.QualifiedName(pm.Namespace("nidm",Constants.NIDM),"PositiveAndNegativeSyndromeScale")} - acq_obj2 = AssessmentObject(acq2,attributes=kwargs) + kwargs = { + pm.PROV_TYPE: pm.QualifiedName( + pm.Namespace("nidm", Constants.NIDM), "PositiveAndNegativeSyndromeScale" + ) + } + acq_obj2 = AssessmentObject(acq2, attributes=kwargs) - #save a turtle file - with open("test_gpi.ttl",'w') as f: + # save a turtle file + with open("test_gpi.ttl", "w") as f: f.write(project.serializeTurtle()) assessment_list = Query.GetProjectInstruments(["test_gpi.ttl"], proj_uuid) remove("test_gpi.ttl") - assert Constants.NIDM + "NorthAmericanAdultReadingTest" in [str(x) for x in assessment_list['assessment_type'].to_list()] - assert Constants.NIDM + "PositiveAndNegativeSyndromeScale" in [str(x) for x in assessment_list['assessment_type'].to_list()] + assert Constants.NIDM + "NorthAmericanAdultReadingTest" in [ + str(x) for x in assessment_list["assessment_type"].to_list() + ] + assert Constants.NIDM + "PositiveAndNegativeSyndromeScale" in [ + str(x) for x in assessment_list["assessment_type"].to_list() + ] -''' +""" The test data file could/should have the following project meta data. Taken from https://raw.githubusercontent.com/incf-nidash/nidm/master/nidm/nidm-experiment/terms/nidm-experiment.owl - + - description - fileName - license @@ -171,18 +206,21 @@ def test_GetProjectInstruments(): - AppliedFilter - SolutionFlowSpeed - RecordingLocation - -Returns the - -''' + +Returns the + +""" + + def saveTestFile(file_name, data): project = Project(uuid="_123_" + file_name, attributes=data) return saveProject(file_name, project) + def saveProject(file_name, project): # save a turtle file - with open(file_name, 'w') as f: + with open(file_name, "w") as f: f.write(project.serializeTurtle()) return "nidm:_123_{}".format(file_name) @@ -190,61 +228,65 @@ def saveProject(file_name, project): def makeProjectTestFile(filename): DCTYPES = Namespace("http://purl.org/dc/dcmitype/") - kwargs = {Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", # this is the "title" - Constants.NIDM_PROJECT_IDENTIFIER: 9610, - Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", - Constants.NIDM_FILENAME: "testfile.ttl", - Constants.NIDM_PROJECT_LICENSE: "MIT Licence", - Constants.NIDM_PROJECT_SOURCE: "Educational Source", - Constants.NIDM_HAD_NUMERICAL_VALUE: "numval???", - Constants.NIDM_BATH_SOLUTION: "bath", - Constants.NIDM_CELL_TYPE: "ctype", - Constants.NIDM_CHANNEL_NUMBER: "5", - Constants.NIDM_ELECTRODE_IMPEDANCE: ".01", - Constants.NIDM_GROUP_LABEL: "group 123", - Constants.NIDM_HOLLOW_ELECTRODE_SOLUTION: "water", - Constants.NIDM_HAD_IMAGE_CONTRACT_TYPE: "off", - Constants.NIDM_HAD_IMAGE_USAGE_TYPE: "abcd", - Constants.NIDM_NUBMER_OF_CHANNELS: "11", - Constants.NIDM_APPLIED_FILTER: "on", - Constants.NIDM_SOLUTION_FLOW_SPEED: "2.8", - Constants.NIDM_RECORDING_LOCATION: "lab" - } + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", # this is the "title" + Constants.NIDM_PROJECT_IDENTIFIER: 9610, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", + Constants.NIDM_FILENAME: "testfile.ttl", + Constants.NIDM_PROJECT_LICENSE: "MIT Licence", + Constants.NIDM_PROJECT_SOURCE: "Educational Source", + Constants.NIDM_HAD_NUMERICAL_VALUE: "numval???", + Constants.NIDM_BATH_SOLUTION: "bath", + Constants.NIDM_CELL_TYPE: "ctype", + Constants.NIDM_CHANNEL_NUMBER: "5", + Constants.NIDM_ELECTRODE_IMPEDANCE: ".01", + Constants.NIDM_GROUP_LABEL: "group 123", + Constants.NIDM_HOLLOW_ELECTRODE_SOLUTION: "water", + Constants.NIDM_HAD_IMAGE_CONTRACT_TYPE: "off", + Constants.NIDM_HAD_IMAGE_USAGE_TYPE: "abcd", + Constants.NIDM_NUBMER_OF_CHANNELS: "11", + Constants.NIDM_APPLIED_FILTER: "on", + Constants.NIDM_SOLUTION_FLOW_SPEED: "2.8", + Constants.NIDM_RECORDING_LOCATION: "lab", + } return saveTestFile(filename, kwargs) + def makeProjectTestFile2(filename): DCTYPES = Namespace("http://purl.org/dc/dcmitype/") - kwargs = {Constants.NIDM_PROJECT_NAME: "TEST B", # this is the "title" - Constants.NIDM_PROJECT_IDENTIFIER: 1234, - Constants.NIDM_PROJECT_DESCRIPTION: "More Scans", - Constants.NIDM_FILENAME: "testfile2.ttl", - Constants.NIDM_PROJECT_LICENSE: "Creative Commons", - Constants.NIDM_PROJECT_SOURCE: "Other", - Constants.NIDM_HAD_NUMERICAL_VALUE: "numval???", - Constants.NIDM_BATH_SOLUTION: "bath", - Constants.NIDM_CELL_TYPE: "ctype", - Constants.NIDM_CHANNEL_NUMBER: "5", - Constants.NIDM_ELECTRODE_IMPEDANCE: ".01", - Constants.NIDM_GROUP_LABEL: "group 123", - Constants.NIDM_HOLLOW_ELECTRODE_SOLUTION: "water", - Constants.NIDM_HAD_IMAGE_CONTRACT_TYPE: "off", - Constants.NIDM_HAD_IMAGE_USAGE_TYPE: "abcd", - Constants.NIDM_NUBMER_OF_CHANNELS: "11", - Constants.NIDM_APPLIED_FILTER: "on", - Constants.NIDM_SOLUTION_FLOW_SPEED: "2.8", - Constants.NIDM_RECORDING_LOCATION: "lab" - } + kwargs = { + Constants.NIDM_PROJECT_NAME: "TEST B", # this is the "title" + Constants.NIDM_PROJECT_IDENTIFIER: 1234, + Constants.NIDM_PROJECT_DESCRIPTION: "More Scans", + Constants.NIDM_FILENAME: "testfile2.ttl", + Constants.NIDM_PROJECT_LICENSE: "Creative Commons", + Constants.NIDM_PROJECT_SOURCE: "Other", + Constants.NIDM_HAD_NUMERICAL_VALUE: "numval???", + Constants.NIDM_BATH_SOLUTION: "bath", + Constants.NIDM_CELL_TYPE: "ctype", + Constants.NIDM_CHANNEL_NUMBER: "5", + Constants.NIDM_ELECTRODE_IMPEDANCE: ".01", + Constants.NIDM_GROUP_LABEL: "group 123", + Constants.NIDM_HOLLOW_ELECTRODE_SOLUTION: "water", + Constants.NIDM_HAD_IMAGE_CONTRACT_TYPE: "off", + Constants.NIDM_HAD_IMAGE_USAGE_TYPE: "abcd", + Constants.NIDM_NUBMER_OF_CHANNELS: "11", + Constants.NIDM_APPLIED_FILTER: "on", + Constants.NIDM_SOLUTION_FLOW_SPEED: "2.8", + Constants.NIDM_RECORDING_LOCATION: "lab", + } project = Project(uuid="_123_" + filename, attributes=kwargs) s1 = Session(project) a1 = AssessmentAcquisition(session=s1) - # = s1.add_acquisition("a1", attributes={"http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#Age" : 22}) + # = s1.add_acquisition("a1", attributes={"http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#Age" : 22}) - p1 = a1.add_person("p1", attributes={Constants.NIDM_GIVEN_NAME:"George", Constants.NIDM_AGE: 22}) + p1 = a1.add_person( + "p1", attributes={Constants.NIDM_GIVEN_NAME: "George", Constants.NIDM_AGE: 22} + ) a1.add_qualified_association(person=p1, role=Constants.NIDM_PARTICIPANT) - return saveProject(filename, project) @@ -253,13 +295,13 @@ def test_GetProjectsMetadata(): p2 = makeProjectTestFile2("testfile2.ttl") files = ["testfile.ttl", "testfile2.ttl"] - if USE_GITHUB_DATA and not Path('./cmu_a.nidm.ttl').is_file(): + if USE_GITHUB_DATA and not Path("./cmu_a.nidm.ttl").is_file(): urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl", - "cmu_a.nidm.ttl" + "cmu_a.nidm.ttl", ) files.append("cmu_a.nidm.ttl") - elif Path('./cmu_a.nidm.ttl').is_file(): + elif Path("./cmu_a.nidm.ttl").is_file(): files.append("cmu_a.nidm.ttl") parsed = Query.GetProjectsMetadata(files) @@ -273,9 +315,12 @@ def test_GetProjectsMetadata(): if USE_GITHUB_DATA: # find the project ID from the CMU file p3 = None - for project_id in parsed['projects']: + for project_id in parsed["projects"]: if project_id != p1 and project_id != p2: - if parsed['projects'][project_id][str(Constants.NIDM_PROJECT_NAME)] == "ABIDE - CMU_a": + if ( + parsed["projects"][project_id][str(Constants.NIDM_PROJECT_NAME)] + == "ABIDE - CMU_a" + ): p3 = project_id break assert p3 != None @@ -312,8 +357,10 @@ def test_GetProjectsMetadata(): def test_prefix_helpers(): - - assert Query.expandNIDMAbbreviation("ndar:src_subject_id") == "https://ndar.nih.gov/api/datadictionary/v2/dataelement/src_subject_id" + assert ( + Query.expandNIDMAbbreviation("ndar:src_subject_id") + == "https://ndar.nih.gov/api/datadictionary/v2/dataelement/src_subject_id" + ) assert Query.matchPrefix("http://purl.org/nidash/nidm#abc") == "nidm:abc" assert Query.matchPrefix("http://www.w3.org/ns/prov#123") == "prov:123" @@ -322,38 +369,42 @@ def test_prefix_helpers(): def test_getProjectAcquisitionObjects(): - if not Path('./cmu_a.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./cmu_a.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl", - "cmu_a.nidm.ttl" + "cmu_a.nidm.ttl", ) - files = ['cmu_a.nidm.ttl'] + files = ["cmu_a.nidm.ttl"] project_list = Query.GetProjectsUUID(files) project_uuid = str(project_list[0]) - objects = Query.getProjectAcquisitionObjects(files,project_uuid) + objects = Query.getProjectAcquisitionObjects(files, project_uuid) - assert isinstance(objects,list) + assert isinstance(objects, list) def test_GetProjectAttributes(): global cmu_test_project_uuid - if not Path('./cmu_a.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./cmu_a.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl", - "cmu_a.nidm.ttl" + "cmu_a.nidm.ttl", ) files = ABIDE_FILES project_uuid = cmu_test_project_uuid - project_attributes = nidm.experiment.Navigate.GetProjectAttributes(files, project_uuid) - assert ('prov:Location' in project_attributes) or ('Location' in project_attributes) - assert ('dctypes:title' in project_attributes) or ('title' in project_attributes) - assert ('http://www.w3.org/1999/02/22-rdf-syntax-ns#type' in project_attributes) or ('type' in project_attributes) - assert ('AcquisitionModality') in project_attributes - assert ('ImageContrastType') in project_attributes - assert ('Task') in project_attributes - assert ('ImageUsageType') in project_attributes + project_attributes = nidm.experiment.Navigate.GetProjectAttributes( + files, project_uuid + ) + assert ("prov:Location" in project_attributes) or ("Location" in project_attributes) + assert ("dctypes:title" in project_attributes) or ("title" in project_attributes) + assert ( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" in project_attributes + ) or ("type" in project_attributes) + assert ("AcquisitionModality") in project_attributes + assert ("ImageContrastType") in project_attributes + assert ("Task") in project_attributes + assert ("ImageUsageType") in project_attributes def test_download_cde_files(): @@ -361,24 +412,33 @@ def test_download_cde_files(): assert cde_dir == tempfile.gettempdir() fcount = 0 for url in Constants.CDE_FILE_LOCATIONS: - fname = url.split('/')[-1] - assert path.isfile("{}/{}".format(cde_dir, fname) ) + fname = url.split("/")[-1] + assert path.isfile("{}/{}".format(cde_dir, fname)) fcount += 1 assert fcount > 0 -@pytest.mark.skip(reason="We don't have an easily accessible file for this test so skipping it until better test samples are available.") -def test_custom_data_types(): - SPECIAL_TEST_FILES = ['/opt/project/ttl/MTdemog_aseg.ttl'] - - valuetype1 = Query.getDataTypeInfo(Query.OpenGraph(SPECIAL_TEST_FILES[0]), 'no-real-value') - assert (valuetype1 == False) - - valuetype2 = Query.getDataTypeInfo(Query.OpenGraph(SPECIAL_TEST_FILES[0]), Constants.NIIRI['age_e3hrcc']) - assert (str(valuetype2['label']) == 'age') - assert (str(valuetype2['description']) == "Age of participant at scan") - assert (str(valuetype2['isAbout']) == str(Constants.NIIRI['24d78sq'])) - valuetype3 = Query.getDataTypeInfo(Query.OpenGraph(SPECIAL_TEST_FILES[0]), 'age_e3hrcc') - assert (str(valuetype3['label']) == 'age') - assert (str(valuetype3['description']) == "Age of participant at scan") - assert (str(valuetype3['isAbout']) == str(Constants.NIIRI['24d78sq'])) +@pytest.mark.skip( + reason="We don't have an easily accessible file for this test so skipping it until better test samples are available." +) +def test_custom_data_types(): + SPECIAL_TEST_FILES = ["/opt/project/ttl/MTdemog_aseg.ttl"] + + valuetype1 = Query.getDataTypeInfo( + Query.OpenGraph(SPECIAL_TEST_FILES[0]), "no-real-value" + ) + assert valuetype1 == False + + valuetype2 = Query.getDataTypeInfo( + Query.OpenGraph(SPECIAL_TEST_FILES[0]), Constants.NIIRI["age_e3hrcc"] + ) + assert str(valuetype2["label"]) == "age" + assert str(valuetype2["description"]) == "Age of participant at scan" + assert str(valuetype2["isAbout"]) == str(Constants.NIIRI["24d78sq"]) + + valuetype3 = Query.getDataTypeInfo( + Query.OpenGraph(SPECIAL_TEST_FILES[0]), "age_e3hrcc" + ) + assert str(valuetype3["label"]) == "age" + assert str(valuetype3["description"]) == "Age of participant at scan" + assert str(valuetype3["isAbout"]) == str(Constants.NIIRI["24d78sq"]) diff --git a/nidm/experiment/tools/agent.ttl b/nidm/experiment/tools/agent.ttl index 4af0f1bc..2d21c0de 100644 --- a/nidm/experiment/tools/agent.ttl +++ b/nidm/experiment/tools/agent.ttl @@ -113,4 +113,3 @@ niiri:_ses1 a nidm:Session, niiri:_ses2 a nidm:Session, prov:Activity ; dct:isPartOf niiri:p2 . - diff --git a/nidm/experiment/tools/bidsmri2nidm.py b/nidm/experiment/tools/bidsmri2nidm.py index 8ba5f35c..03550ca4 100755 --- a/nidm/experiment/tools/bidsmri2nidm.py +++ b/nidm/experiment/tools/bidsmri2nidm.py @@ -26,29 +26,40 @@ # ************************************************************************************** # ************************************************************************************** -import sys, getopt, os -import bids -from nidm.experiment import Project,Session,MRAcquisition,AcquisitionObject,DemographicsObject, AssessmentAcquisition, \ - AssessmentObject,MRObject,Acquisition -from nidm.core import BIDS_Constants,Constants - -from prov.model import PROV_LABEL,PROV_TYPE, ProvInfluence -from nidm.experiment.Utils import map_variables_to_terms, add_attributes_with_cde, addGitAnnexSources -from pandas import DataFrame -from prov.model import QualifiedName,Namespace -from os.path import isfile,join -from argparse import RawTextHelpFormatter -import json -import logging - +from argparse import ArgumentParser, RawTextHelpFormatter import csv +import getopt import glob -from argparse import ArgumentParser # Python program to find SHA256 hash string of a file import hashlib from io import StringIO -from rdflib import Graph, RDF, Literal,URIRef +import json +import logging +import os +from os.path import isfile, join +import sys +import bids +from nidm.core import BIDS_Constants, Constants +from nidm.experiment import ( + Acquisition, + AcquisitionObject, + AssessmentAcquisition, + AssessmentObject, + DemographicsObject, + MRAcquisition, + MRObject, + Project, + Session, +) +from nidm.experiment.Utils import ( + add_attributes_with_cde, + addGitAnnexSources, + map_variables_to_terms, +) +from pandas import DataFrame +from prov.model import PROV_LABEL, PROV_TYPE, Namespace, ProvInfluence, QualifiedName +from rdflib import RDF, Graph, Literal, URIRef def getRelPathToBIDS(filepath, bids_root): @@ -59,10 +70,10 @@ def getRelPathToBIDS(filepath, bids_root): :param bids_root: absolute path to BIDS directory :return: relative path to file, relative to BIDS root """ - path,file = os.path.split(filepath) + path, file = os.path.split(filepath) - relpath = path.replace(bids_root,"") - return(os.path.join(relpath,file)) + relpath = path.replace(bids_root, "") + return os.path.join(relpath, file) def getsha512(filename): @@ -72,53 +83,101 @@ def getsha512(filename): :return: hexadecimal sha512 sum of file. """ sha512_hash = hashlib.sha512() - with open(filename,"rb") as f: + with open(filename, "rb") as f: # Read and update hash string value in blocks of 4K - for byte_block in iter(lambda: f.read(4096),b""): + for byte_block in iter(lambda: f.read(4096), b""): sha512_hash.update(byte_block) return sha512_hash.hexdigest() def main(): - parser = ArgumentParser(description= -"""This program will represent a BIDS MRI dataset as a NIDM RDF document and provide user with opportunity to annotate + parser = ArgumentParser( + description="""This program will represent a BIDS MRI dataset as a NIDM RDF document and provide user with opportunity to annotate the dataset (i.e. create sidecar files) and associate selected variables with broader concepts to make datasets more FAIR. \n\n Note, you must obtain an API key to Interlex by signing up for an account at scicrunch.org then going to My Account -and API Keys. Then set the environment variable INTERLEX_API_KEY with your key. """ ,formatter_class=RawTextHelpFormatter) - - parser.add_argument('-d', dest='directory', required=True, help="Full path to BIDS dataset directory") - parser.add_argument('-jsonld', '--jsonld', action='store_true', help='If flag set, output is json-ld not TURTLE') - #parser.add_argument('-png', '--png', action='store_true', help='If flag set, tool will output PNG file of NIDM graph') - parser.add_argument('-bidsignore', '--bidsignore', action='store_true', default = False, help='If flag set, tool will add NIDM-related files to .bidsignore file') - parser.add_argument('-no_concepts', '--no_concepts', action='store_true', default = False, help='If flag set, tool will no do concept mapping') +and API Keys. Then set the environment variable INTERLEX_API_KEY with your key. """, + formatter_class=RawTextHelpFormatter, + ) + + parser.add_argument( + "-d", + dest="directory", + required=True, + help="Full path to BIDS dataset directory", + ) + parser.add_argument( + "-jsonld", + "--jsonld", + action="store_true", + help="If flag set, output is json-ld not TURTLE", + ) + # parser.add_argument('-png', '--png', action='store_true', help='If flag set, tool will output PNG file of NIDM graph') + parser.add_argument( + "-bidsignore", + "--bidsignore", + action="store_true", + default=False, + help="If flag set, tool will add NIDM-related files to .bidsignore file", + ) + parser.add_argument( + "-no_concepts", + "--no_concepts", + action="store_true", + default=False, + help="If flag set, tool will no do concept mapping", + ) # adding argument group for var->term mappings - mapvars_group = parser.add_argument_group('map variables to terms arguments') - mapvars_group.add_argument('-json_map', '--json_map', dest='json_map',required=False,default=False,help="Optional full path to user-suppled JSON file containing variable-term mappings.") - #parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional full path of NIDM file to add BIDS data to. ") - parser.add_argument('-log','--log', dest='logfile',required=False, default=None, help="Full path to directory to save log file. Log file name is bidsmri2nidm_[basename(args.directory)].log") - parser.add_argument('-o', dest='outputfile', required=False, default="nidm.ttl", help="Outputs turtle file called nidm.ttl in BIDS directory by default..or whatever path/filename is set here") + mapvars_group = parser.add_argument_group("map variables to terms arguments") + mapvars_group.add_argument( + "-json_map", + "--json_map", + dest="json_map", + required=False, + default=False, + help="Optional full path to user-suppled JSON file containing variable-term mappings.", + ) + # parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional full path of NIDM file to add BIDS data to. ") + parser.add_argument( + "-log", + "--log", + dest="logfile", + required=False, + default=None, + help="Full path to directory to save log file. Log file name is bidsmri2nidm_[basename(args.directory)].log", + ) + parser.add_argument( + "-o", + dest="outputfile", + required=False, + default="nidm.ttl", + help="Outputs turtle file called nidm.ttl in BIDS directory by default..or whatever path/filename is set here", + ) args = parser.parse_args() directory = args.directory if args.logfile is not None: - logging.basicConfig(filename=join(args.logfile,'bidsmri2nidm_' + args.outputfile.split('/')[-2] + '.log'), level=logging.DEBUG) + logging.basicConfig( + filename=join( + args.logfile, "bidsmri2nidm_" + args.outputfile.split("/")[-2] + ".log" + ), + level=logging.DEBUG, + ) # add some logging info - logging.info("bidsmri2nidm %s" %args) + logging.info("bidsmri2nidm %s" % args) # if args.owl is None: # args.owl = 'nidm' - # importlib.reload(sys) # sys.setdefaultencoding('utf8') - project, cde, cde_pheno = bidsmri2project(directory,args) + project, cde, cde_pheno = bidsmri2project(directory, args) # convert to rdflib Graph and add CDEs rdf_graph = Graph() - rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle') + rdf_graph.parse(source=StringIO(project.serializeTurtle()), format="turtle") rdf_graph = rdf_graph + cde # add rest of phenotype CDEs @@ -127,7 +186,6 @@ def main(): logging.info("Writing NIDM file....") - # logging.info(project.serializeTurtle()) logging.info("Serializing NIDM graph and creating graph visualization..") @@ -138,14 +196,14 @@ def main(): # if we're choosing json-ld, make sure file extension is .json # if args.jsonld: # outputfile=os.path.join(directory,os.path.splitext(args.outputfile)[0]+".json") - # if flag set to add to .bidsignore then add + # if flag set to add to .bidsignore then add # if (args.bidsignore): # addbidsignore(directory,os.path.splitext(args.outputfile)[0]+".json") - outputfile=os.path.join(directory,args.outputfile) - if (args.bidsignore): - addbidsignore(directory,args.outputfile) - rdf_graph.serialize(destination=outputfile,format='turtle') + outputfile = os.path.join(directory, args.outputfile) + if args.bidsignore: + addbidsignore(directory, args.outputfile) + rdf_graph.serialize(destination=outputfile, format="turtle") # else: # outputfile=os.path.join(directory,args.outputfile) @@ -161,10 +219,10 @@ def main(): # outputfile = args.outputfile # if (args.bidsignore): # addbidsignore(directory,args.outputfile) - outputfile=args.outputfile - if (args.bidsignore): - addbidsignore(directory,args.outputfile) - rdf_graph.serialize(destination=outputfile,format='turtle') + outputfile = args.outputfile + if args.bidsignore: + addbidsignore(directory, args.outputfile) + rdf_graph.serialize(destination=outputfile, format="turtle") # serialize NIDM file # with open(outputfile,'w') as f: @@ -173,7 +231,6 @@ def main(): # else: # f.write(project.serializeTurtle()) - # save a DOT graph as PNG # if (args.png): # project.save_DotGraph(str(outputfile + ".png"), format="png") @@ -181,19 +238,23 @@ def main(): # if (args.bidsignore): # addbidsignore(directory,os.path.basename(str(outputfile + ".png"))) -def addbidsignore(directory,filename_to_add): - logging.info("Adding file %s to %s/.bidsignore..." %(filename_to_add,directory)) + +def addbidsignore(directory, filename_to_add): + logging.info("Adding file %s to %s/.bidsignore..." % (filename_to_add, directory)) # adds filename_to_add to .bidsignore file in directory - if not isfile(os.path.join(directory,".bidsignore")): - with open(os.path.join(directory,".bidsignore"),"w") as text_file: - text_file.write("%s\n" %filename_to_add) + if not isfile(os.path.join(directory, ".bidsignore")): + with open(os.path.join(directory, ".bidsignore"), "w") as text_file: + text_file.write("%s\n" % filename_to_add) else: - if filename_to_add not in open(os.path.join(directory,".bidsignore")).read(): - with open(os.path.join(directory,".bidsignore"),"a") as text_file: - text_file.write("%s\n" %filename_to_add) + if filename_to_add not in open(os.path.join(directory, ".bidsignore")).read(): + with open(os.path.join(directory, ".bidsignore"), "a") as text_file: + text_file.write("%s\n" % filename_to_add) -def addimagingsessions(bids_layout,subject_id,session,participant, directory,img_session=None): - ''' + +def addimagingsessions( + bids_layout, subject_id, session, participant, directory, img_session=None +): + """ This function adds imaging acquistions to the NIDM file and deals with BIDS structures potentially having separate ses-* directories or not :param bids_layout: @@ -203,91 +264,168 @@ def addimagingsessions(bids_layout,subject_id,session,participant, directory,img :param directory: :param img_session: :return: - ''' - for file_tpl in bids_layout.get(subject=subject_id, session=img_session, extension=['.nii', '.nii.gz']): + """ + for file_tpl in bids_layout.get( + subject=subject_id, session=img_session, extension=[".nii", ".nii.gz"] + ): # create an acquisition activity - acq=MRAcquisition(session) + acq = MRAcquisition(session) # check whether participant (i.e. agent) for this subject already exists (i.e. if participants.tsv file exists) else create one - if (not subject_id in participant) and (not subject_id.lstrip("0") in participant): + if (not subject_id in participant) and ( + not subject_id.lstrip("0") in participant + ): participant[subject_id] = {} - participant[subject_id]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:subject_id})) - acq.add_qualified_association(person=participant[subject_id]['person'],role=Constants.NIDM_PARTICIPANT) + participant[subject_id]["person"] = acq.add_person( + attributes=({Constants.NIDM_SUBJECTID: subject_id}) + ) + acq.add_qualified_association( + person=participant[subject_id]["person"], + role=Constants.NIDM_PARTICIPANT, + ) # added to account for errors in BIDS datasets where participants.tsv may have no leading 0's but # subject directories do. Since bidsmri2nidm starts with the participants.tsv file those are the IDs unless # there's a subject directory and no entry in participants.tsv... elif subject_id.lstrip("0") in participant: # then link acquisition to the agent with participant ID without leading 00's - acq.add_qualified_association(person=participant[subject_id.lstrip("0")]['person'],role=Constants.NIDM_PARTICIPANT) + acq.add_qualified_association( + person=participant[subject_id.lstrip("0")]["person"], + role=Constants.NIDM_PARTICIPANT, + ) else: # add qualified association with person - acq.add_qualified_association(person=participant[subject_id]['person'],role=Constants.NIDM_PARTICIPANT) - - + acq.add_qualified_association( + person=participant[subject_id]["person"], + role=Constants.NIDM_PARTICIPANT, + ) - if file_tpl.entities['datatype']=='anat': + if file_tpl.entities["datatype"] == "anat": # do something with anatomicals acq_obj = MRObject(acq) # add image contrast type - if file_tpl.entities['suffix'] in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_CONTRAST_TYPE:BIDS_Constants.scans[file_tpl.entities['suffix']]}) + if file_tpl.entities["suffix"] in BIDS_Constants.scans: + acq_obj.add_attributes( + { + Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[ + file_tpl.entities["suffix"] + ] + } + ) else: - logging.info("WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.entities['suffix']) + logging.info( + "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" + % file_tpl.entities["suffix"] + ) # add image usage type - if file_tpl.entities['datatype'] in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_USAGE_TYPE:BIDS_Constants.scans[file_tpl.entities['datatype']]}) + if file_tpl.entities["datatype"] in BIDS_Constants.scans: + acq_obj.add_attributes( + { + Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans[ + file_tpl.entities["datatype"] + ] + } + ) else: - logging.info("WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.entities['datatype']) + logging.info( + "WARNING: No matching image usage type found in BIDS_Constants.py for %s" + % file_tpl.entities["datatype"] + ) # add file link # make relative link to - acq_obj.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(join(file_tpl.dirname,file_tpl.filename), directory)}) + acq_obj.add_attributes( + { + Constants.NIDM_FILENAME: getRelPathToBIDS( + join(file_tpl.dirname, file_tpl.filename), directory + ) + } + ) # add git-annex info if exists - num_sources = addGitAnnexSources(obj=acq_obj,filepath=join(file_tpl.dirname,file_tpl.filename),bids_root=directory) + num_sources = addGitAnnexSources( + obj=acq_obj, + filepath=join(file_tpl.dirname, file_tpl.filename), + bids_root=directory, + ) # if there aren't any git annex sources then just store the local directory information if num_sources == 0: # WIP: add absolute location of BIDS directory on disk for later finding of files - acq_obj.add_attributes({Constants.PROV['Location']:"file:/" + join(file_tpl.dirname,file_tpl.filename)}) - - + acq_obj.add_attributes( + { + Constants.PROV["Location"]: "file:/" + + join(file_tpl.dirname, file_tpl.filename) + } + ) # add sha512 sum - if isfile(join(directory,file_tpl.dirname,file_tpl.filename)): - acq_obj.add_attributes({Constants.CRYPTO_SHA512:getsha512(join(directory,file_tpl.dirname,file_tpl.filename))}) + if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): + acq_obj.add_attributes( + { + Constants.CRYPTO_SHA512: getsha512( + join(directory, file_tpl.dirname, file_tpl.filename) + ) + } + ) else: - logging.info("WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." %join(directory,file_tpl.dirname,file_tpl.filename)) + logging.info( + "WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." + % join(directory, file_tpl.dirname, file_tpl.filename) + ) # get associated JSON file if exists # There is T1w.json file with information - json_data = (bids_layout.get(suffix=file_tpl.entities['suffix'],subject=subject_id))[0].metadata - if len(json_data.info)>0: + json_data = ( + bids_layout.get(suffix=file_tpl.entities["suffix"], subject=subject_id) + )[0].metadata + if len(json_data.info) > 0: for key in json_data.info.items(): if key in BIDS_Constants.json_keys: if type(json_data.info[key]) is list: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:''.join(str(e) for e in json_data.info[key])}) + acq_obj.add_attributes( + { + BIDS_Constants.json_keys[ + key.replace(" ", "_") + ]: "".join(str(e) for e in json_data.info[key]) + } + ) else: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:json_data.info[key]}) + acq_obj.add_attributes( + { + BIDS_Constants.json_keys[ + key.replace(" ", "_") + ]: json_data.info[key] + } + ) # Parse T1w.json file in BIDS directory to add the attributes contained inside - if (os.path.isdir(os.path.join(directory))): + if os.path.isdir(os.path.join(directory)): try: - with open(os.path.join(directory,'T1w.json')) as data_file: + with open(os.path.join(directory, "T1w.json")) as data_file: dataset = json.load(data_file) except OSError: - logging.warning("Cannot find T1w.json file...looking for session-specific one") + logging.warning( + "Cannot find T1w.json file...looking for session-specific one" + ) try: if img_session is not None: - with open(os.path.join(directory,'ses-' + img_session + '_T1w.json')) as data_file: + with open( + os.path.join( + directory, "ses-" + img_session + "_T1w.json" + ) + ) as data_file: dataset = json.load(data_file) else: - dataset={} + dataset = {} except OSError: - logging.warning("Cannot find session-specific T1w.json file which is required in the BIDS spec..continuing anyway") - dataset={} + logging.warning( + "Cannot find session-specific T1w.json file which is required in the BIDS spec..continuing anyway" + ) + dataset = {} else: - logging.critical("Error: BIDS directory %s does not exist!" %os.path.join(directory)) + logging.critical( + "Error: BIDS directory %s does not exist!" % os.path.join(directory) + ) exit(-1) # add various attributes if they exist in BIDS dataset @@ -295,165 +433,306 @@ def addimagingsessions(bids_layout,subject_id,session,participant, directory,img # if key from T1w.json file is mapped to term in BIDS_Constants.py then add to NIDM object if key in BIDS_Constants.json_keys: if type(dataset[key]) is list: - acq_obj.add_attributes({BIDS_Constants.json_keys[key]:"".join(dataset[key])}) + acq_obj.add_attributes( + {BIDS_Constants.json_keys[key]: "".join(dataset[key])} + ) else: - acq_obj.add_attributes({BIDS_Constants.json_keys[key]:dataset[key]}) + acq_obj.add_attributes( + {BIDS_Constants.json_keys[key]: dataset[key]} + ) - elif file_tpl.entities['datatype'] == 'func': + elif file_tpl.entities["datatype"] == "func": # do something with functionals acq_obj = MRObject(acq) # add image contrast type - if file_tpl.entities['suffix'] in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_CONTRAST_TYPE:BIDS_Constants.scans[file_tpl.entities['suffix']]}) + if file_tpl.entities["suffix"] in BIDS_Constants.scans: + acq_obj.add_attributes( + { + Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[ + file_tpl.entities["suffix"] + ] + } + ) else: - logging.info("WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.entities['suffix']) + logging.info( + "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" + % file_tpl.entities["suffix"] + ) # add image usage type - if file_tpl.entities['datatype'] in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_USAGE_TYPE:BIDS_Constants.scans[file_tpl.entities['datatype']]}) + if file_tpl.entities["datatype"] in BIDS_Constants.scans: + acq_obj.add_attributes( + { + Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans[ + file_tpl.entities["datatype"] + ] + } + ) else: - logging.info("WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.entities['datatype']) + logging.info( + "WARNING: No matching image usage type found in BIDS_Constants.py for %s" + % file_tpl.entities["datatype"] + ) # make relative link to - acq_obj.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(join(file_tpl.dirname,file_tpl.filename), directory)}) + acq_obj.add_attributes( + { + Constants.NIDM_FILENAME: getRelPathToBIDS( + join(file_tpl.dirname, file_tpl.filename), directory + ) + } + ) # add git-annex/datalad info if exists - num_sources=addGitAnnexSources(obj=acq_obj,filepath=join(file_tpl.dirname,file_tpl.filename),bids_root=directory) + num_sources = addGitAnnexSources( + obj=acq_obj, + filepath=join(file_tpl.dirname, file_tpl.filename), + bids_root=directory, + ) # if there aren't any git annex sources then just store the local directory information if num_sources == 0: # WIP: add absolute location of BIDS directory on disk for later finding of files - acq_obj.add_attributes({Constants.PROV['Location']:"file:/" + join(file_tpl.dirname,file_tpl.filename)}) - - + acq_obj.add_attributes( + { + Constants.PROV["Location"]: "file:/" + + join(file_tpl.dirname, file_tpl.filename) + } + ) # add sha512 sum - if isfile(join(directory,file_tpl.dirname,file_tpl.filename)): - acq_obj.add_attributes({Constants.CRYPTO_SHA512:getsha512(join(directory,file_tpl.dirname,file_tpl.filename))}) + if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): + acq_obj.add_attributes( + { + Constants.CRYPTO_SHA512: getsha512( + join(directory, file_tpl.dirname, file_tpl.filename) + ) + } + ) else: - logging.info("WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." %join(directory,file_tpl.dirname,file_tpl.filename)) + logging.info( + "WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." + % join(directory, file_tpl.dirname, file_tpl.filename) + ) - if 'run' in file_tpl.entities: - acq_obj.add_attributes({BIDS_Constants.json_keys["run"]:file_tpl.entities['run']}) + if "run" in file_tpl.entities: + acq_obj.add_attributes( + {BIDS_Constants.json_keys["run"]: file_tpl.entities["run"]} + ) # get associated JSON file if exists - json_data = (bids_layout.get(suffix=file_tpl.entities['suffix'],subject=subject_id))[0].metadata + json_data = ( + bids_layout.get(suffix=file_tpl.entities["suffix"], subject=subject_id) + )[0].metadata - if len(json_data.info)>0: + if len(json_data.info) > 0: for key in json_data.info.items(): if key in BIDS_Constants.json_keys: if type(json_data.info[key]) is list: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:''.join(str(e) for e in json_data.info[key])}) + acq_obj.add_attributes( + { + BIDS_Constants.json_keys[ + key.replace(" ", "_") + ]: "".join(str(e) for e in json_data.info[key]) + } + ) else: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:json_data.info[key]}) + acq_obj.add_attributes( + { + BIDS_Constants.json_keys[ + key.replace(" ", "_") + ]: json_data.info[key] + } + ) # get associated events TSV file - if 'run' in file_tpl.entities: - events_file = bids_layout.get(subject=subject_id, extension=['.tsv'],modality=file_tpl.entities['datatype'],task=file_tpl.entities['task'],run=file_tpl.entities['run']) + if "run" in file_tpl.entities: + events_file = bids_layout.get( + subject=subject_id, + extension=[".tsv"], + modality=file_tpl.entities["datatype"], + task=file_tpl.entities["task"], + run=file_tpl.entities["run"], + ) else: - events_file = bids_layout.get(subject=subject_id, extension=['.tsv'],modality=file_tpl.entities['datatype'],task=file_tpl.entities['task']) + events_file = bids_layout.get( + subject=subject_id, + extension=[".tsv"], + modality=file_tpl.entities["datatype"], + task=file_tpl.entities["task"], + ) # if there is an events file then this is task-based so create an acquisition object for the task file and link if events_file: - #for now create acquisition object and link it to the associated scan + # for now create acquisition object and link it to the associated scan events_obj = AcquisitionObject(acq) - #add prov type, task name as prov:label, and link to filename of events file - - events_obj.add_attributes({PROV_TYPE:Constants.NIDM_MRI_BOLD_EVENTS,BIDS_Constants.json_keys["TaskName"]: json_data["TaskName"], Constants.NIDM_FILENAME:getRelPathToBIDS(events_file[0].filename, directory)}) - #link it to appropriate MR acquisition entity + # add prov type, task name as prov:label, and link to filename of events file + + events_obj.add_attributes( + { + PROV_TYPE: Constants.NIDM_MRI_BOLD_EVENTS, + BIDS_Constants.json_keys["TaskName"]: json_data["TaskName"], + Constants.NIDM_FILENAME: getRelPathToBIDS( + events_file[0].filename, directory + ), + } + ) + # link it to appropriate MR acquisition entity events_obj.wasAttributedTo(acq_obj) # add source links for this file # add git-annex/datalad info if exists - num_sources=addGitAnnexSources(obj=events_obj,filepath=events_file,bids_root=directory) + num_sources = addGitAnnexSources( + obj=events_obj, filepath=events_file, bids_root=directory + ) # if there aren't any git annex sources then just store the local directory information if num_sources == 0: # WIP: add absolute location of BIDS directory on disk for later finding of files - events_obj.add_attributes({Constants.PROV['Location']:"file:/" + events_file}) - + events_obj.add_attributes( + {Constants.PROV["Location"]: "file:/" + events_file} + ) - #Parse task-rest_bold.json file in BIDS directory to add the attributes contained inside - if (os.path.isdir(os.path.join(directory))): + # Parse task-rest_bold.json file in BIDS directory to add the attributes contained inside + if os.path.isdir(os.path.join(directory)): try: - with open(os.path.join(directory,'task-rest_bold.json')) as data_file: + with open( + os.path.join(directory, "task-rest_bold.json") + ) as data_file: dataset = json.load(data_file) except OSError: - logging.warning("Cannot find task-rest_bold.json file looking for session-specific one") + logging.warning( + "Cannot find task-rest_bold.json file looking for session-specific one" + ) try: if img_session is not None: - with open(os.path.join(directory,'ses-' + img_session +'_task-rest_bold.json')) as data_file: + with open( + os.path.join( + directory, + "ses-" + img_session + "_task-rest_bold.json", + ) + ) as data_file: dataset = json.load(data_file) else: - dataset={} + dataset = {} except OSError: - logging.warning("Cannot find session-specific task-rest_bold.json file which is required in the BIDS spec..continuing anyway") - dataset={} + logging.warning( + "Cannot find session-specific task-rest_bold.json file which is required in the BIDS spec..continuing anyway" + ) + dataset = {} else: - logging.critical("Error: BIDS directory %s does not exist!" %os.path.join(directory)) + logging.critical( + "Error: BIDS directory %s does not exist!" % os.path.join(directory) + ) exit(-1) - #add various attributes if they exist in BIDS dataset + # add various attributes if they exist in BIDS dataset for key in dataset: - #if key from task-rest_bold.json file is mapped to term in BIDS_Constants.py then add to NIDM object + # if key from task-rest_bold.json file is mapped to term in BIDS_Constants.py then add to NIDM object if key in BIDS_Constants.json_keys: if type(dataset[key]) is list: - acq_obj.add_attributes({BIDS_Constants.json_keys[key]:",".join(map(str,dataset[key]))}) + acq_obj.add_attributes( + { + BIDS_Constants.json_keys[key]: ",".join( + map(str, dataset[key]) + ) + } + ) else: - acq_obj.add_attributes({BIDS_Constants.json_keys[key]:dataset[key]}) + acq_obj.add_attributes( + {BIDS_Constants.json_keys[key]: dataset[key]} + ) # DBK added for ASL support 3/16/21 # WIP: Waiting for pybids > 0.12.4 to support perfusion scans - elif file_tpl.entities['datatype'] == 'perf': + elif file_tpl.entities["datatype"] == "perf": acq_obj = MRObject(acq) # add image contrast type - if file_tpl.entities['suffix'] in BIDS_Constants.scans: + if file_tpl.entities["suffix"] in BIDS_Constants.scans: acq_obj.add_attributes( - {Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[file_tpl.entities['suffix']]}) + { + Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[ + file_tpl.entities["suffix"] + ] + } + ) else: logging.info( - "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.entities[ - 'suffix']) + "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" + % file_tpl.entities["suffix"] + ) # add image usage type - if file_tpl.entities['datatype'] in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans["asl"]}) + if file_tpl.entities["datatype"] in BIDS_Constants.scans: + acq_obj.add_attributes( + {Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans["asl"]} + ) else: logging.info( - "WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.entities[ - 'datatype']) + "WARNING: No matching image usage type found in BIDS_Constants.py for %s" + % file_tpl.entities["datatype"] + ) # make relative link to acq_obj.add_attributes( - {Constants.NIDM_FILENAME: getRelPathToBIDS(join(file_tpl.dirname, file_tpl.filename), directory)}) + { + Constants.NIDM_FILENAME: getRelPathToBIDS( + join(file_tpl.dirname, file_tpl.filename), directory + ) + } + ) # add sha512 sum if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): acq_obj.add_attributes( - {Constants.CRYPTO_SHA512: getsha512(join(directory, file_tpl.dirname, file_tpl.filename))}) + { + Constants.CRYPTO_SHA512: getsha512( + join(directory, file_tpl.dirname, file_tpl.filename) + ) + } + ) else: logging.info( - "WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." % join(directory, - file_tpl.dirname, - file_tpl.filename)) + "WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." + % join(directory, file_tpl.dirname, file_tpl.filename) + ) # add git-annex/datalad info if exists - num_sources = addGitAnnexSources(obj=acq_obj, filepath=join(file_tpl.dirname, file_tpl.filename), - bids_root=directory) + num_sources = addGitAnnexSources( + obj=acq_obj, + filepath=join(file_tpl.dirname, file_tpl.filename), + bids_root=directory, + ) if num_sources == 0: acq_obj.add_attributes( - {Constants.PROV['Location']: "file:/" + join(file_tpl.dirname, file_tpl.filename)}) + { + Constants.PROV["Location"]: "file:/" + + join(file_tpl.dirname, file_tpl.filename) + } + ) - if 'run' in file_tpl.entities: + if "run" in file_tpl.entities: acq_obj.add_attributes({BIDS_Constants.json_keys["run"]: file_tpl.run}) # get associated JSON file if exists - json_data = (bids_layout.get(suffix=file_tpl.entities['suffix'], subject=subject_id))[0].metadata + json_data = ( + bids_layout.get(suffix=file_tpl.entities["suffix"], subject=subject_id) + )[0].metadata if len(json_data.info) > 0: for key in json_data.info.items(): if key in BIDS_Constants.json_keys: if type(json_data.info[key]) is list: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]: ''.join( - str(e) for e in json_data.info[key])}) + acq_obj.add_attributes( + { + BIDS_Constants.json_keys[ + key.replace(" ", "_") + ]: "".join(str(e) for e in json_data.info[key]) + } + ) else: acq_obj.add_attributes( - {BIDS_Constants.json_keys[key.replace(" ", "_")]: json_data.info[key]}) + { + BIDS_Constants.json_keys[ + key.replace(" ", "_") + ]: json_data.info[key] + } + ) # check if separate M0 scan exists, if so add location and filename # WIP, waiting for pybids > 0.12.4 to support... @@ -461,123 +740,257 @@ def addimagingsessions(bids_layout,subject_id,session,participant, directory,img # WIP support B0 maps...waiting for pybids > 0.12.4 # elif file_tpl.entities['datatype'] == 'fmap': - - elif file_tpl.entities['datatype'] == 'dwi': - #do stuff with with dwi scans... + elif file_tpl.entities["datatype"] == "dwi": + # do stuff with with dwi scans... acq_obj = MRObject(acq) - #add image contrast type - if file_tpl.entities['suffix'] in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_CONTRAST_TYPE:BIDS_Constants.scans[file_tpl.entities['suffix']]}) + # add image contrast type + if file_tpl.entities["suffix"] in BIDS_Constants.scans: + acq_obj.add_attributes( + { + Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[ + file_tpl.entities["suffix"] + ] + } + ) else: - logging.info("WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.entities['suffix']) + logging.info( + "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" + % file_tpl.entities["suffix"] + ) - #add image usage type - if file_tpl.entities['datatype'] in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_USAGE_TYPE:BIDS_Constants.scans["dti"]}) + # add image usage type + if file_tpl.entities["datatype"] in BIDS_Constants.scans: + acq_obj.add_attributes( + {Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans["dti"]} + ) else: - logging.info("WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.entities['datatype']) - #make relative link to - acq_obj.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(join(file_tpl.dirname,file_tpl.filename), directory)}) - #add sha512 sum - if isfile(join(directory,file_tpl.dirname,file_tpl.filename)): - acq_obj.add_attributes({Constants.CRYPTO_SHA512:getsha512(join(directory,file_tpl.dirname,file_tpl.filename))}) + logging.info( + "WARNING: No matching image usage type found in BIDS_Constants.py for %s" + % file_tpl.entities["datatype"] + ) + # make relative link to + acq_obj.add_attributes( + { + Constants.NIDM_FILENAME: getRelPathToBIDS( + join(file_tpl.dirname, file_tpl.filename), directory + ) + } + ) + # add sha512 sum + if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): + acq_obj.add_attributes( + { + Constants.CRYPTO_SHA512: getsha512( + join(directory, file_tpl.dirname, file_tpl.filename) + ) + } + ) else: - logging.info("WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." %join(directory,file_tpl.dirname,file_tpl.filename)) + logging.info( + "WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." + % join(directory, file_tpl.dirname, file_tpl.filename) + ) # add git-annex/datalad info if exists - num_sources = addGitAnnexSources(obj=acq_obj,filepath=join(file_tpl.dirname,file_tpl.filename),bids_root=directory) + num_sources = addGitAnnexSources( + obj=acq_obj, + filepath=join(file_tpl.dirname, file_tpl.filename), + bids_root=directory, + ) if num_sources == 0: - acq_obj.add_attributes({Constants.PROV['Location']: "file:/" + join(file_tpl.dirname,file_tpl.filename)}) + acq_obj.add_attributes( + { + Constants.PROV["Location"]: "file:/" + + join(file_tpl.dirname, file_tpl.filename) + } + ) - if 'run' in file_tpl.entities: - acq_obj.add_attributes({BIDS_Constants.json_keys["run"]:file_tpl.run}) + if "run" in file_tpl.entities: + acq_obj.add_attributes({BIDS_Constants.json_keys["run"]: file_tpl.run}) - #get associated JSON file if exists - json_data = (bids_layout.get(suffix=file_tpl.entities['suffix'],subject=subject_id))[0].metadata + # get associated JSON file if exists + json_data = ( + bids_layout.get(suffix=file_tpl.entities["suffix"], subject=subject_id) + )[0].metadata - if len(json_data.info)>0: + if len(json_data.info) > 0: for key in json_data.info.items(): if key in BIDS_Constants.json_keys: if type(json_data.info[key]) is list: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:''.join(str(e) for e in json_data.info[key])}) + acq_obj.add_attributes( + { + BIDS_Constants.json_keys[ + key.replace(" ", "_") + ]: "".join(str(e) for e in json_data.info[key]) + } + ) else: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:json_data.info[key]}) - #for bval and bvec files, what to do with those? + acq_obj.add_attributes( + { + BIDS_Constants.json_keys[ + key.replace(" ", "_") + ]: json_data.info[key] + } + ) + # for bval and bvec files, what to do with those? # for now, create new generic acquisition objects, link the files, and associate with the one for the DWI scan? acq_obj_bval = AcquisitionObject(acq) - acq_obj_bval.add_attributes({PROV_TYPE:BIDS_Constants.scans["bval"]}) + acq_obj_bval.add_attributes({PROV_TYPE: BIDS_Constants.scans["bval"]}) # add file link to bval files - acq_obj_bval.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(join(file_tpl.dirname,bids_layout.get_bval(join(file_tpl.dirname,file_tpl.filename))),directory)}) + acq_obj_bval.add_attributes( + { + Constants.NIDM_FILENAME: getRelPathToBIDS( + join( + file_tpl.dirname, + bids_layout.get_bval( + join(file_tpl.dirname, file_tpl.filename) + ), + ), + directory, + ) + } + ) # add git-annex/datalad info if exists - num_sources = addGitAnnexSources(obj=acq_obj_bval,filepath=join(file_tpl.dirname,bids_layout.get_bval(join(file_tpl.dirname,file_tpl.filename))),bids_root=directory) + num_sources = addGitAnnexSources( + obj=acq_obj_bval, + filepath=join( + file_tpl.dirname, + bids_layout.get_bval(join(file_tpl.dirname, file_tpl.filename)), + ), + bids_root=directory, + ) if num_sources == 0: # WIP: add absolute location of BIDS directory on disk for later finding of files - acq_obj_bval.add_attributes({Constants.PROV['Location']:"file:/" + join(file_tpl.dirname,bids_layout.get_bval(join(file_tpl.dirname,file_tpl.filename)))}) + acq_obj_bval.add_attributes( + { + Constants.PROV["Location"]: "file:/" + + join( + file_tpl.dirname, + bids_layout.get_bval( + join(file_tpl.dirname, file_tpl.filename) + ), + ) + } + ) # add sha512 sum - if isfile(join(directory,file_tpl.dirname,file_tpl.filename)): - acq_obj_bval.add_attributes({Constants.CRYPTO_SHA512:getsha512(join(directory,file_tpl.dirname,file_tpl.filename))}) + if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): + acq_obj_bval.add_attributes( + { + Constants.CRYPTO_SHA512: getsha512( + join(directory, file_tpl.dirname, file_tpl.filename) + ) + } + ) else: - logging.info("WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." %join(directory,file_tpl.dirname,file_tpl.filename)) + logging.info( + "WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." + % join(directory, file_tpl.dirname, file_tpl.filename) + ) acq_obj_bvec = AcquisitionObject(acq) - acq_obj_bvec.add_attributes({PROV_TYPE:BIDS_Constants.scans["bvec"]}) - #add file link to bvec files - acq_obj_bvec.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(join(file_tpl.dirname,bids_layout.get_bvec(join(file_tpl.dirname,file_tpl.filename))),directory)}) + acq_obj_bvec.add_attributes({PROV_TYPE: BIDS_Constants.scans["bvec"]}) + # add file link to bvec files + acq_obj_bvec.add_attributes( + { + Constants.NIDM_FILENAME: getRelPathToBIDS( + join( + file_tpl.dirname, + bids_layout.get_bvec( + join(file_tpl.dirname, file_tpl.filename) + ), + ), + directory, + ) + } + ) # add git-annex/datalad info if exists - num_sources = addGitAnnexSources(obj=acq_obj_bvec,filepath=join(file_tpl.dirname,bids_layout.get_bvec(join(file_tpl.dirname,file_tpl.filename))),bids_root=directory) + num_sources = addGitAnnexSources( + obj=acq_obj_bvec, + filepath=join( + file_tpl.dirname, + bids_layout.get_bvec(join(file_tpl.dirname, file_tpl.filename)), + ), + bids_root=directory, + ) if num_sources == 0: - #WIP: add absolute location of BIDS directory on disk for later finding of files - acq_obj_bvec.add_attributes({Constants.PROV['Location']:"file:/" + join(file_tpl.dirname,bids_layout.get_bvec(join(file_tpl.dirname,file_tpl.filename)))}) + # WIP: add absolute location of BIDS directory on disk for later finding of files + acq_obj_bvec.add_attributes( + { + Constants.PROV["Location"]: "file:/" + + join( + file_tpl.dirname, + bids_layout.get_bvec( + join(file_tpl.dirname, file_tpl.filename) + ), + ) + } + ) - if isfile(join(directory,file_tpl.dirname,file_tpl.filename)): - #add sha512 sum - acq_obj_bvec.add_attributes({Constants.CRYPTO_SHA512:getsha512(join(directory,file_tpl.dirname,file_tpl.filename))}) + if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): + # add sha512 sum + acq_obj_bvec.add_attributes( + { + Constants.CRYPTO_SHA512: getsha512( + join(directory, file_tpl.dirname, file_tpl.filename) + ) + } + ) else: - logging.info("WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." %join(directory,file_tpl.dirname,file_tpl.filename)) + logging.info( + "WARNING file %s doesn't exist! No SHA512 sum stored in NIDM files..." + % join(directory, file_tpl.dirname, file_tpl.filename) + ) - #link bval and bvec acquisition object entities together or is their association with DWI scan... + # link bval and bvec acquisition object entities together or is their association with DWI scan... -def bidsmri2project(directory, args): +def bidsmri2project(directory, args): # initialize empty cde graph...it may get replaced if we're doing variable to term mapping or not - cde=Graph() + cde = Graph() # Parse dataset_description.json file in BIDS directory - if (os.path.isdir(os.path.join(directory))): + if os.path.isdir(os.path.join(directory)): try: - with open(os.path.join(directory,'dataset_description.json')) as data_file: + with open(os.path.join(directory, "dataset_description.json")) as data_file: dataset = json.load(data_file) except OSError: - logging.critical("Cannot find dataset_description.json file which is required in the BIDS spec") + logging.critical( + "Cannot find dataset_description.json file which is required in the BIDS spec" + ) exit("-1") else: - logging.critical("Error: BIDS directory %s does not exist!" %os.path.join(directory)) + logging.critical( + "Error: BIDS directory %s does not exist!" % os.path.join(directory) + ) exit("-1") # create project / nidm-exp doc project = Project() # if there are git annex sources then add them - num_sources=addGitAnnexSources(obj=project.get_uuid(),bids_root=directory) + num_sources = addGitAnnexSources(obj=project.get_uuid(), bids_root=directory) # else just add the local path to the dataset if num_sources == 0: - project.add_attributes({Constants.PROV['Location']:"file:/" + directory}) - + project.add_attributes({Constants.PROV["Location"]: "file:/" + directory}) # add various attributes if they exist in BIDS dataset description file for key in dataset: # if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object if key in BIDS_Constants.dataset_description: if type(dataset[key]) is list: - project.add_attributes({BIDS_Constants.dataset_description[key]:"".join(dataset[key])}) + project.add_attributes( + {BIDS_Constants.dataset_description[key]: "".join(dataset[key])} + ) else: - project.add_attributes({BIDS_Constants.dataset_description[key]:dataset[key]}) + project.add_attributes( + {BIDS_Constants.dataset_description[key]: dataset[key]} + ) # added special case to include DOI of project in hash for data element UUIDs to prevent collisions with # similar data elements from other projects and make the bids2nidm conversion deterministic in the sense @@ -590,91 +1003,119 @@ def bidsmri2project(directory, args): else: dataset_doi = None - - - - # get BIDS layout - bids.config.set_option('extension_initial_dot', True) + bids.config.set_option("extension_initial_dot", True) bids_layout = bids.BIDSLayout(directory) - # create empty dictionary for sessions where key is subject id and used later to link scans to same session as demographics - session={} - participant={} + session = {} + participant = {} # Parse participants.tsv file in BIDS directory and create study and acquisition objects - if os.path.isfile(os.path.join(directory,'participants.tsv')): - with open(os.path.join(directory,'participants.tsv')) as csvfile: - participants_data = csv.DictReader(csvfile, delimiter='\t') + if os.path.isfile(os.path.join(directory, "participants.tsv")): + with open(os.path.join(directory, "participants.tsv")) as csvfile: + participants_data = csv.DictReader(csvfile, delimiter="\t") # logic to create data dictionaries for variables and/or use them if they already exist. # first iterate over variables in dataframe and check which ones are already mapped as BIDS constants # and which are not. For those that are not # we want to use the variable-term mapping functions to help the user create data dictionaries - mapping_list=[] - column_to_terms={} + mapping_list = [] + column_to_terms = {} for field in participants_data.fieldnames: - # column is not in BIDS_Constants if not (field in BIDS_Constants.participants): # add column to list for column_to_terms mapping mapping_list.append(field) - - # if user didn't supply a json data dictionary file but we're doing some variable-term mapping create an empty one # for column_to_terms to use if args.json_map == False: - # defaults to participants.json because here we're mapping the participants.tsv file variables to terms # if participants.json file doesn't exist then run without json mapping file - if not os.path.isfile(os.path.join(directory,'participants.json')): + if not os.path.isfile(os.path.join(directory, "participants.json")): # temporary data frame of variables we need to create data dictionaries for - temp=DataFrame(columns=mapping_list) + temp = DataFrame(columns=mapping_list) # create data dictionary without concept mapping if args.no_concepts: - column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv', - df=temp,output_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False, - dataset_identifier = dataset_doi) + column_to_terms, cde = map_variables_to_terms( + directory=directory, + assessment_name="participants.tsv", + df=temp, + output_file=os.path.join(directory, "participants.json"), + bids=True, + associate_concepts=False, + dataset_identifier=dataset_doi, + ) # create data dictionary with concept mapping else: - column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv', - df=temp,output_file=os.path.join(directory,'participants.json'),bids=True, - dataset_identifier = dataset_doi) + column_to_terms, cde = map_variables_to_terms( + directory=directory, + assessment_name="participants.tsv", + df=temp, + output_file=os.path.join(directory, "participants.json"), + bids=True, + dataset_identifier=dataset_doi, + ) else: # temporary data frame of variables we need to create data dictionaries for - temp=DataFrame(columns=mapping_list) + temp = DataFrame(columns=mapping_list) # create data dictionary without concept mapping if args.no_concepts: - column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, - output_file=os.path.join(directory,'participants.json'),json_source=os.path.join(directory,'participants.json'), - bids=True,associate_concepts=False, dataset_identifier = dataset_doi) + column_to_terms, cde = map_variables_to_terms( + directory=directory, + assessment_name="participants.tsv", + df=temp, + output_file=os.path.join(directory, "participants.json"), + json_source=os.path.join(directory, "participants.json"), + bids=True, + associate_concepts=False, + dataset_identifier=dataset_doi, + ) # create data dictionary with concept mapping else: - column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, - output_file=os.path.join(directory,'participants.json'),json_source=os.path.join(directory,'participants.json'), - bids=True,dataset_identifier = dataset_doi) + column_to_terms, cde = map_variables_to_terms( + directory=directory, + assessment_name="participants.tsv", + df=temp, + output_file=os.path.join(directory, "participants.json"), + json_source=os.path.join(directory, "participants.json"), + bids=True, + dataset_identifier=dataset_doi, + ) # if user supplied a JSON data dictionary then use it else: # temporary data frame of variables we need to create data dictionaries for - temp=DataFrame(columns=mapping_list) + temp = DataFrame(columns=mapping_list) # create data dictionary without concept mapping if args.no_concepts: - column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, - output_file=os.path.join(directory,'participants.json'),json_source=args.json_map,bids=True, - associate_concepts=False, dataset_identifier = dataset_doi) + column_to_terms, cde = map_variables_to_terms( + directory=directory, + assessment_name="participants.tsv", + df=temp, + output_file=os.path.join(directory, "participants.json"), + json_source=args.json_map, + bids=True, + associate_concepts=False, + dataset_identifier=dataset_doi, + ) # create data dictionary with concept mapping else: - column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, - output_file=os.path.join(directory,'participants.json'),json_source=args.json_map,bids=True, - dataset_identifier = dataset_doi) + column_to_terms, cde = map_variables_to_terms( + directory=directory, + assessment_name="participants.tsv", + df=temp, + output_file=os.path.join(directory, "participants.json"), + json_source=args.json_map, + bids=True, + dataset_identifier=dataset_doi, + ) # iterate over rows in participants.tsv file and create NIDM objects for sessions and acquisitions for row in participants_data: - #create session object for subject to be used for participant metadata and image data - #parse subject id from "sub-XXXX" string - temp = row['participant_id'].split("-") - #for ambiguity in BIDS datasets. Sometimes participant_id is sub-XXXX and othertimes it's just XXXX + # create session object for subject to be used for participant metadata and image data + # parse subject id from "sub-XXXX" string + temp = row["participant_id"].split("-") + # for ambiguity in BIDS datasets. Sometimes participant_id is sub-XXXX and othertimes it's just XXXX if len(temp) > 1: subjid = temp[1] else: @@ -690,42 +1131,78 @@ def bidsmri2project(directory, args): # create participant dictionary indexed by subjid to get agen UUIDs for later use participant[subjid] = {} # add agent for this participant to the graph - participant[subjid]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']})) + participant[subjid]["person"] = acq.add_person( + attributes=({Constants.NIDM_SUBJECTID: row["participant_id"]}) + ) # add nfo:filename entry to assessment entity to reflect provenance of where this data came from - acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(os.path.join(directory,'participants.tsv'),directory)}) - - #add qualified association of participant with acquisition activity - acq.add_qualified_association(person=participant[subjid]['person'],role=Constants.NIDM_PARTICIPANT) + acq_entity.add_attributes( + { + Constants.NIDM_FILENAME: getRelPathToBIDS( + os.path.join(directory, "participants.tsv"), directory + ) + } + ) + + # add qualified association of participant with acquisition activity + acq.add_qualified_association( + person=participant[subjid]["person"], + role=Constants.NIDM_PARTICIPANT, + ) # print(acq) # if there are git annex sources for participants.tsv file then add them - num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory) + num_sources = addGitAnnexSources( + obj=acq_entity.get_uuid(), bids_root=directory + ) # else just add the local path to the dataset if num_sources == 0: - acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.tsv')}) + acq_entity.add_attributes( + { + Constants.PROV["Location"]: "file:/" + + os.path.join(directory, "participants.tsv") + } + ) # if there's a participant.json sidecar file then create an entity and # associate it with all the assessment entities - if os.path.isfile(os.path.join(directory,'participants.json')): + if os.path.isfile(os.path.join(directory, "participants.json")): json_sidecar = AcquisitionObject(acquisition=acq) - json_sidecar.add_attributes({PROV_TYPE:QualifiedName(Namespace("bids",Constants.BIDS),"sidecar_file"), Constants.NIDM_FILENAME: - getRelPathToBIDS(os.path.join(directory,'participants.json'),directory)}) + json_sidecar.add_attributes( + { + PROV_TYPE: QualifiedName( + Namespace("bids", Constants.BIDS), "sidecar_file" + ), + Constants.NIDM_FILENAME: getRelPathToBIDS( + os.path.join(directory, "participants.json"), directory + ), + } + ) # add Git Annex Sources # if there are git annex sources for participants.tsv file then add them - num_sources=addGitAnnexSources(obj=json_sidecar.get_uuid(),filepath=os.path.join(directory,'participants.json'),bids_root=directory) + num_sources = addGitAnnexSources( + obj=json_sidecar.get_uuid(), + filepath=os.path.join(directory, "participants.json"), + bids_root=directory, + ) # else just add the local path to the dataset if num_sources == 0: - json_sidecar.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.json')}) - + json_sidecar.add_attributes( + { + Constants.PROV["Location"]: "file:/" + + os.path.join(directory, "participants.json") + } + ) # check if json_sidecar entity exists and if so associate assessment entity with it - if 'json_sidecar' in locals(): - #connect json_entity with acq_entity - acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_sidecar}) + if "json_sidecar" in locals(): + # connect json_entity with acq_entity + acq_entity.add_attributes( + {Constants.PROV["wasInfluencedBy"]: json_sidecar} + ) - for key,value in row.items(): + for key, value in row.items(): if not value: continue # for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, @@ -735,37 +1212,79 @@ def bidsmri2project(directory, args): # WIP # Here we are adding to CDE graph data elements for BIDS Constants that remain fixed for # each BIDS-compliant dataset - if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID): - + if not ( + BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID + ): cde_id = Constants.BIDS[key] # add the data element to the CDE graph - cde.add((cde_id,RDF.type, Constants.NIDM['DataElement'])) - cde.add((cde_id,RDF.type, Constants.PROV['Entity'])) + cde.add((cde_id, RDF.type, Constants.NIDM["DataElement"])) + cde.add((cde_id, RDF.type, Constants.PROV["Entity"])) # add some basic information about this data element - cde.add((cde_id,Constants.RDFS['label'],Literal(BIDS_Constants.participants[key].localpart))) - cde.add((cde_id,Constants.NIDM['isAbout'],URIRef(BIDS_Constants.participants[key].uri))) - cde.add((cde_id,Constants.NIDM['source_variable'],Literal(key))) - cde.add((cde_id,Constants.NIDM['description'],Literal("participant/subject identifier"))) - cde.add((cde_id,Constants.RDFS['comment'],Literal("BIDS participants_id variable fixed in specification"))) - cde.add((cde_id,Constants.RDFS['valueType'],URIRef(Constants.XSD["string"]))) - - acq_entity.add_attributes({cde_id:Literal(value)}) + cde.add( + ( + cde_id, + Constants.RDFS["label"], + Literal(BIDS_Constants.participants[key].localpart), + ) + ) + cde.add( + ( + cde_id, + Constants.NIDM["isAbout"], + URIRef(BIDS_Constants.participants[key].uri), + ) + ) + cde.add( + ( + cde_id, + Constants.NIDM["source_variable"], + Literal(key), + ) + ) + cde.add( + ( + cde_id, + Constants.NIDM["description"], + Literal("participant/subject identifier"), + ) + ) + cde.add( + ( + cde_id, + Constants.RDFS["comment"], + Literal( + "BIDS participants_id variable fixed in specification" + ), + ) + ) + cde.add( + ( + cde_id, + Constants.RDFS["valueType"], + URIRef(Constants.XSD["string"]), + ) + ) + + acq_entity.add_attributes({cde_id: Literal(value)}) # else variable in participants.tsv isn't a BIDS constant CDE it's a user-defined variable # so we need to add the variable data dictionary as a PersonalDataElement to NIDM graph using # the cde graph returned from map_variables_to_terms functions above else: - # here we're adding the assessment data for a particular row in the participants.tsv value # to the acquisition entity (acq_entity) using the UUIDs in the cde graph to identify the # data element we're storing assessment data for. - add_attributes_with_cde(prov_object=acq_entity,cde=cde,row_variable=key,value=value) - + add_attributes_with_cde( + prov_object=acq_entity, + cde=cde, + row_variable=key, + value=value, + ) # create acquisition objects for each scan for each subject # loop through all subjects in dataset for subject_id in bids_layout.get_subjects(): - logging.info("Converting subject: %s" %subject_id) + logging.info("Converting subject: %s" % subject_id) # skip .git directories...added to support datalad datasets if subject_id.startswith("."): continue @@ -783,112 +1302,188 @@ def bidsmri2project(directory, args): # create a new session ses = Session(project) # add session number as metadata - ses.add_attributes({Constants.BIDS['session_number']:img_session}) - addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=ses,participant=participant, directory=directory,img_session=img_session) + ses.add_attributes({Constants.BIDS["session_number"]: img_session}) + addimagingsessions( + bids_layout=bids_layout, + subject_id=subject_id, + session=ses, + participant=participant, + directory=directory, + img_session=img_session, + ) # else we have no ses-* directories in the BIDS layout - addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=Session(project),participant=participant, directory=directory) - - + addimagingsessions( + bids_layout=bids_layout, + subject_id=subject_id, + session=Session(project), + participant=participant, + directory=directory, + ) # Added temporarily to support phenotype files # for each *.tsv / *.json file pair in the phenotypes directory # WIP: ADD VARIABLE -> TERM MAPPING HERE cde_pheno = [] - for tsv_file in glob.glob(os.path.join(directory,"phenotype","*.tsv")): + for tsv_file in glob.glob(os.path.join(directory, "phenotype", "*.tsv")): # for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to # the associated JSON data dictionary file with open(tsv_file) as phenofile: - pheno_data = csv.DictReader(phenofile, delimiter='\t') - mapping_list=[] - column_to_terms={} + pheno_data = csv.DictReader(phenofile, delimiter="\t") + mapping_list = [] + column_to_terms = {} for field in pheno_data.fieldnames: # column is not in BIDS_Constants if not (field in BIDS_Constants.participants): # add column to list for column_to_terms mapping mapping_list.append(field) - # if user didn't supply a json data dictionary file # create an empty one for column_to_terms to use if args.json_map == False: - #defaults to participants.json because here we're mapping the participants.tsv file variables to terms + # defaults to participants.json because here we're mapping the participants.tsv file variables to terms # if participants.json file doesn't exist then run without json mapping file if not os.path.isfile(os.path.splitext(tsv_file)[0] + ".json"): - #maps variables in CSV file to terms - temp=DataFrame(columns=mapping_list) + # maps variables in CSV file to terms + temp = DataFrame(columns=mapping_list) if args.no_concepts: - column_to_terms_pheno,cde_tmp = map_variables_to_terms(directory=directory,assessment_name=tsv_file, - df=temp,output_file=os.path.splitext(tsv_file)[0] + ".json",bids=True,associate_concepts=False) + column_to_terms_pheno, cde_tmp = map_variables_to_terms( + directory=directory, + assessment_name=tsv_file, + df=temp, + output_file=os.path.splitext(tsv_file)[0] + ".json", + bids=True, + associate_concepts=False, + ) else: - column_to_terms_pheno,cde_tmp = map_variables_to_terms(directory=directory,assessment_name=tsv_file, - df=temp,output_file=os.path.splitext(tsv_file)[0] + ".json",bids=True) + column_to_terms_pheno, cde_tmp = map_variables_to_terms( + directory=directory, + assessment_name=tsv_file, + df=temp, + output_file=os.path.splitext(tsv_file)[0] + ".json", + bids=True, + ) else: - #maps variables in CSV file to terms - temp=DataFrame(columns=mapping_list) + # maps variables in CSV file to terms + temp = DataFrame(columns=mapping_list) if args.no_concepts: - column_to_terms_pheno,cde_tmp = map_variables_to_terms(directory=directory, assessment_name=tsv_file, df=temp, - output_file=os.path.splitext(tsv_file)[0] + ".json",json_source=os.path.splitext(tsv_file)[0] + ".json",bids=True,associate_concepts=False) + column_to_terms_pheno, cde_tmp = map_variables_to_terms( + directory=directory, + assessment_name=tsv_file, + df=temp, + output_file=os.path.splitext(tsv_file)[0] + ".json", + json_source=os.path.splitext(tsv_file)[0] + ".json", + bids=True, + associate_concepts=False, + ) else: - column_to_terms_pheno,cde_tmp = map_variables_to_terms(directory=directory, assessment_name=tsv_file, df=temp, - output_file=os.path.splitext(tsv_file)[0] + ".json",json_source=os.path.splitext(tsv_file)[0] + ".json",bids=True) + column_to_terms_pheno, cde_tmp = map_variables_to_terms( + directory=directory, + assessment_name=tsv_file, + df=temp, + output_file=os.path.splitext(tsv_file)[0] + ".json", + json_source=os.path.splitext(tsv_file)[0] + ".json", + bids=True, + ) # else user did supply a json data dictionary so use it else: - #maps variables in CSV file to terms - temp=DataFrame(columns=mapping_list) + # maps variables in CSV file to terms + temp = DataFrame(columns=mapping_list) if args.no_concepts: - column_to_terms_pheno, cde_tmp = map_variables_to_terms(directory=directory, assessment_name=tsv_file, df=temp, - output_file=os.path.splitext(tsv_file)[0] + ".json",json_source=args.json_map,bids=True,associate_concepts=False) + column_to_terms_pheno, cde_tmp = map_variables_to_terms( + directory=directory, + assessment_name=tsv_file, + df=temp, + output_file=os.path.splitext(tsv_file)[0] + ".json", + json_source=args.json_map, + bids=True, + associate_concepts=False, + ) else: - column_to_terms_pheno, cde_tmp = map_variables_to_terms(directory=directory, assessment_name=tsv_file, df=temp, - output_file=os.path.splitext(tsv_file)[0] + ".json",json_source=args.json_map,bids=True) + column_to_terms_pheno, cde_tmp = map_variables_to_terms( + directory=directory, + assessment_name=tsv_file, + df=temp, + output_file=os.path.splitext(tsv_file)[0] + ".json", + json_source=args.json_map, + bids=True, + ) for row in pheno_data: - subjid = row['participant_id'].split("-") + subjid = row["participant_id"].split("-") # add acquisition object acq = AssessmentAcquisition(session=session[subjid[1]]) # add qualified association with person - acq.add_qualified_association(person=participant[subjid[1]]['person'],role=Constants.NIDM_PARTICIPANT) + acq.add_qualified_association( + person=participant[subjid[1]]["person"], + role=Constants.NIDM_PARTICIPANT, + ) # add acquisition enttity and associate it with the acquisition activity acq_entity = AssessmentObject(acquisition=acq) - - - for key,value in row.items(): + for key, value in row.items(): if not value: continue # we're using participant_id in NIDM in agent so don't add to assessment as a triple. # BIDS phenotype files seem to have an index column with no column header variable name so skip those - if ((not key == "participant_id") and (key != "")): - add_attributes_with_cde(prov_object=acq_entity,cde=cde_tmp,row_variable=key,value=value) + if (not key == "participant_id") and (key != ""): + add_attributes_with_cde( + prov_object=acq_entity, + cde=cde_tmp, + row_variable=key, + value=value, + ) # link TSV file - acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(tsv_file,directory)}) + acq_entity.add_attributes( + {Constants.NIDM_FILENAME: getRelPathToBIDS(tsv_file, directory)} + ) # if there are git annex sources for participants.tsv file then add them - num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory) + num_sources = addGitAnnexSources( + obj=acq_entity.get_uuid(), bids_root=directory + ) # else just add the local path to the dataset if num_sources == 0: - acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + tsv_file}) - + acq_entity.add_attributes( + {Constants.PROV["Location"]: "file:/" + tsv_file} + ) # link associated JSON file if it exists - data_dict = os.path.join(directory,"phenotype",os.path.splitext(os.path.basename(tsv_file))[0]+ ".json") + data_dict = os.path.join( + directory, + "phenotype", + os.path.splitext(os.path.basename(tsv_file))[0] + ".json", + ) if os.path.isfile(data_dict): # if file exists, create a new entity and associate it with the appropriate activity and a used relationship # with the TSV-related entity json_entity = AcquisitionObject(acquisition=acq) - json_entity.add_attributes({PROV_TYPE:Constants.BIDS["sidecar_file"], Constants.NIDM_FILENAME: - getRelPathToBIDS(data_dict,directory)}) + json_entity.add_attributes( + { + PROV_TYPE: Constants.BIDS["sidecar_file"], + Constants.NIDM_FILENAME: getRelPathToBIDS( + data_dict, directory + ), + } + ) # add Git Annex Sources # if there are git annex sources for participants.tsv file then add them - num_sources=addGitAnnexSources(obj=json_entity.get_uuid(),filepath=data_dict,bids_root=directory) + num_sources = addGitAnnexSources( + obj=json_entity.get_uuid(), + filepath=data_dict, + bids_root=directory, + ) # else just add the local path to the dataset if num_sources == 0: - json_entity.add_attributes({Constants.PROV['Location']:"file:/" + data_dict}) - - #connect json_entity with acq_entity - acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_entity.get_uuid()}) + json_entity.add_attributes( + {Constants.PROV["Location"]: "file:/" + data_dict} + ) + + # connect json_entity with acq_entity + acq_entity.add_attributes( + {Constants.PROV["wasInfluencedBy"]: json_entity.get_uuid()} + ) # append cde_tmp to cde_pheno list for later inclusion in NIDM graph cde_pheno.append(cde_tmp) diff --git a/nidm/experiment/tools/click_base.py b/nidm/experiment/tools/click_base.py index a7929b63..21966a2e 100644 --- a/nidm/experiment/tools/click_base.py +++ b/nidm/experiment/tools/click_base.py @@ -1,5 +1,6 @@ import click + @click.group() def cli(): pass diff --git a/nidm/experiment/tools/click_main.py b/nidm/experiment/tools/click_main.py index 48bcdf6b..69b823d0 100644 --- a/nidm/experiment/tools/click_main.py +++ b/nidm/experiment/tools/click_main.py @@ -1,9 +1,11 @@ import click +from nidm.experiment.tools import ( + nidm_concat, + nidm_convert, + nidm_linreg, + nidm_merge, + nidm_query, + nidm_version, + nidm_visualize, +) from nidm.experiment.tools.click_base import cli -from nidm.experiment.tools import nidm_query -from nidm.experiment.tools import nidm_visualize -from nidm.experiment.tools import nidm_concat -from nidm.experiment.tools import nidm_merge -from nidm.experiment.tools import nidm_convert -from nidm.experiment.tools import nidm_linreg -from nidm.experiment.tools import nidm_version diff --git a/nidm/experiment/tools/csv2nidm.py b/nidm/experiment/tools/csv2nidm.py index a4eb76f4..21c3204e 100644 --- a/nidm/experiment/tools/csv2nidm.py +++ b/nidm/experiment/tools/csv2nidm.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # csv2nidm.py -# License:Apache License, Version 2.0 -#************************************************************************************** -#************************************************************************************** +# License:Apache License, Version 2.0 +# ************************************************************************************** +# ************************************************************************************** # Date: 01-19-18 Coded by: David Keator (dbkeator@gmail.com) # Filename: csv2nidm.py # @@ -14,49 +14,53 @@ # a term to associate with the variable name. The resulting annotated CSV data will # then be written to a NIDM data file. # -#************************************************************************************** +# ************************************************************************************** # Development environment: Python - PyCharm IDE # -#************************************************************************************** +# ************************************************************************************** # System requirements: Python 3.X # Libraries: pybids, numpy, matplotlib, pandas, scipy, math, dateutil, datetime,argparse, # os,sys,getopt,csv -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # Programmer comments: # # -#************************************************************************************** -#************************************************************************************** - -import os,sys -from nidm.experiment import Project,Session,AssessmentAcquisition,AssessmentObject -from nidm.core import Constants -from nidm.experiment.Utils import read_nidm, map_variables_to_terms, add_attributes_with_cde, addGitAnnexSources, \ - redcap_datadictionary_to_json -from nidm.experiment.Query import GetParticipantIDs +# ************************************************************************************** +# ************************************************************************************** from argparse import ArgumentParser -from os.path import dirname, join, splitext,basename -import json -import pandas as pd -from rdflib import Graph,URIRef,RDF,Literal +import csv from io import StringIO -from shutil import copy2 -from nidm.core.Constants import DD +import json import logging -import csv +import os +from os.path import basename, dirname, join, splitext +from shutil import copy2 +import sys import tempfile +from nidm.core import Constants +from nidm.core.Constants import DD +from nidm.experiment import AssessmentAcquisition, AssessmentObject, Project, Session +from nidm.experiment.Query import GetParticipantIDs +from nidm.experiment.Utils import ( + add_attributes_with_cde, + addGitAnnexSources, + map_variables_to_terms, + read_nidm, + redcap_datadictionary_to_json, +) +import pandas as pd +from rdflib import RDF, Graph, Literal, URIRef - -#def createDialogBox(search_results): -#class NewListbox(tk.Listbox): +# def createDialogBox(search_results): +# class NewListbox(tk.Listbox): # def autowidth(self, maxwidth=100): # autowidth(self, maxwidth) -#def autowidth(list, maxwidth=100): +# def autowidth(list, maxwidth=100): # f = font.Font(font=list.cget("font")) # pixels = 0 # for item in list.get(0, "end"): @@ -70,32 +74,71 @@ # list.config(width=width+w) - - - def main(): - parser = ArgumentParser(description='This program will load in a CSV file and iterate over the header \ + parser = ArgumentParser( + description="This program will load in a CSV file and iterate over the header \ variable names performing an elastic search of https://scicrunch.org/ for NIDM-ReproNim \ tagged terms that fuzzy match the variable names. The user will then interactively pick \ a term to associate with the variable name. The resulting annotated CSV data will \ then be written to a NIDM data file. Note, you must obtain an API key to Interlex by signing up \ for an account at scicrunch.org then going to My Account and API Keys. Then set the environment \ - variable INTERLEX_API_KEY with your key.') + variable INTERLEX_API_KEY with your key." + ) - parser.add_argument('-csv', dest='csv_file', required=True, help="Full path to CSV file to convert") + parser.add_argument( + "-csv", dest="csv_file", required=True, help="Full path to CSV file to convert" + ) # parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query") dd_group = parser.add_mutually_exclusive_group() - dd_group.add_argument('-json_map', dest='json_map',required=False,help="Full path to user-suppled JSON file containing variable-term mappings.") - dd_group.add_argument('-redcap', dest='redcap',required=False, help="Full path to a user-supplied RedCap formatted data dictionary for csv file.") - parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional full path of NIDM file to add CSV->NIDM converted graph to") - parser.add_argument('-no_concepts', action='store_true', required=False, help='If this flag is set then no concept associations will be' - 'asked of the user. This is useful if you already have a -json_map specified without concepts and want to' - 'simply run this program to get a NIDM file with user interaction to associate concepts.') - parser.add_argument('-log','--log', dest='logfile',required=False, default=None, help="full path to directory to save log file. Log file name is csv2nidm_[arg.csv_file].log") - parser.add_argument('-dataset_id', '--dataset_id', dest='dataset_identifier',required=False, default=None, - help='If this is provided, which can be any dataset ID although its suggested to use a dataset' - 'DOI if available, unique data element IDs will use this information as part of the hash.') - parser.add_argument('-out', dest='output_file', required=True, help="Full path with filename to save NIDM file") + dd_group.add_argument( + "-json_map", + dest="json_map", + required=False, + help="Full path to user-suppled JSON file containing variable-term mappings.", + ) + dd_group.add_argument( + "-redcap", + dest="redcap", + required=False, + help="Full path to a user-supplied RedCap formatted data dictionary for csv file.", + ) + parser.add_argument( + "-nidm", + dest="nidm_file", + required=False, + help="Optional full path of NIDM file to add CSV->NIDM converted graph to", + ) + parser.add_argument( + "-no_concepts", + action="store_true", + required=False, + help="If this flag is set then no concept associations will be" + "asked of the user. This is useful if you already have a -json_map specified without concepts and want to" + "simply run this program to get a NIDM file with user interaction to associate concepts.", + ) + parser.add_argument( + "-log", + "--log", + dest="logfile", + required=False, + default=None, + help="full path to directory to save log file. Log file name is csv2nidm_[arg.csv_file].log", + ) + parser.add_argument( + "-dataset_id", + "--dataset_id", + dest="dataset_identifier", + required=False, + default=None, + help="If this is provided, which can be any dataset ID although its suggested to use a dataset" + "DOI if available, unique data element IDs will use this information as part of the hash.", + ) + parser.add_argument( + "-out", + dest="output_file", + required=True, + help="Full path with filename to save NIDM file", + ) args = parser.parse_args() # if we have a redcap datadictionary then convert it straight away to a json representation @@ -108,97 +151,120 @@ def main(): if args.csv_file.endswith(".csv"): df = pd.read_csv(args.csv_file) elif args.csv_file.endswith(".tsv"): - df = pd.read_csv(args.csv_file,sep='\t', engine='python') + df = pd.read_csv(args.csv_file, sep="\t", engine="python") else: - print("ERROR: input file must have .csv (comma-separated) or .tsv (tab separated) extensions/" - "file types. Please change your input file appropriately and re-run.") + print( + "ERROR: input file must have .csv (comma-separated) or .tsv (tab separated) extensions/" + "file types. Please change your input file appropriately and re-run." + ) print("no NIDM file created!") exit(1) - #temp = csv.reader(args.csv_file) - #df = pd.DataFrame(temp) + # temp = csv.reader(args.csv_file) + # df = pd.DataFrame(temp) - #maps variables in CSV file to terms - #if args.owl is not False: + # maps variables in CSV file to terms + # if args.owl is not False: # column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map, owl_file=args.owl) - #else: + # else: # if user did not specify -no_concepts then associate concepts interactively with user if not args.no_concepts: - column_to_terms, cde = map_variables_to_terms(df=df, assessment_name=basename(args.csv_file), - directory=dirname(args.output_file), output_file=args.output_file, - json_source=json_map,dataset_identifier=args.dataset_identifier) + column_to_terms, cde = map_variables_to_terms( + df=df, + assessment_name=basename(args.csv_file), + directory=dirname(args.output_file), + output_file=args.output_file, + json_source=json_map, + dataset_identifier=args.dataset_identifier, + ) # run without concept mappings else: - column_to_terms, cde = map_variables_to_terms(df=df, assessment_name=basename(args.csv_file), - directory=dirname(args.output_file), output_file=args.output_file, - json_source=json_map, associate_concepts=False, - dataset_identifier=args.dataset_identifier) + column_to_terms, cde = map_variables_to_terms( + df=df, + assessment_name=basename(args.csv_file), + directory=dirname(args.output_file), + output_file=args.output_file, + json_source=json_map, + associate_concepts=False, + dataset_identifier=args.dataset_identifier, + ) if args.logfile is not None: - logging.basicConfig(filename=join(args.logfile,'csv2nidm_' + os.path.splitext(os.path.basename(args.csv_file))[0] + '.log'), level=logging.DEBUG) + logging.basicConfig( + filename=join( + args.logfile, + "csv2nidm_" + + os.path.splitext(os.path.basename(args.csv_file))[0] + + ".log", + ), + level=logging.DEBUG, + ) # add some logging info - logging.info("csv2nidm %s" %args) + logging.info("csv2nidm %s" % args) - - #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file + # If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file if args.nidm_file: print("Adding to NIDM file...") # get subjectID list for later qres = GetParticipantIDs([args.nidm_file]) - #read in NIDM file + # read in NIDM file project = read_nidm(args.nidm_file) - #with open("/Users/dbkeator/Downloads/test.ttl","w") as f: + # with open("/Users/dbkeator/Downloads/test.ttl","w") as f: # f.write(project.serializeTurtle()) + # get list of session objects + session_objs = project.get_sessions() - #get list of session objects - session_objs=project.get_sessions() - - #look at column_to_terms dictionary for NIDM URL for subject id (Constants.NIDM_SUBJECTID) - id_field=None + # look at column_to_terms dictionary for NIDM URL for subject id (Constants.NIDM_SUBJECTID) + id_field = None for key, value in column_to_terms.items(): - if 'isAbout' in column_to_terms[key]: - for isabout_key,isabout_value in column_to_terms[key]['isAbout'].items(): - if (isabout_key == 'url') or (isabout_key == '@id'): - if (isabout_value == Constants.NIDM_SUBJECTID._uri): + if "isAbout" in column_to_terms[key]: + for isabout_key, isabout_value in column_to_terms[key][ + "isAbout" + ].items(): + if (isabout_key == "url") or (isabout_key == "@id"): + if isabout_value == Constants.NIDM_SUBJECTID._uri: key_tuple = eval(key) - #id_field=key + # id_field=key id_field = key_tuple.variable - #make sure id_field is a string for zero-padded subject ids - #re-read data file with constraint that key field is read as string - df = pd.read_csv(args.csv_file,dtype={id_field : str}) + # make sure id_field is a string for zero-padded subject ids + # re-read data file with constraint that key field is read as string + df = pd.read_csv(args.csv_file, dtype={id_field: str}) break - #if we couldn't find a subject ID field in column_to_terms, ask user + # if we couldn't find a subject ID field in column_to_terms, ask user if id_field is None: - option=1 + option = 1 for column in df.columns: - print("%d: %s" %(option,column)) - option=option+1 - selection=input("Please select the subject ID field from the list above: ") + print("%d: %s" % (option, column)) + option = option + 1 + selection = input( + "Please select the subject ID field from the list above: " + ) # Make sure user selected one of the options. If not present user with selection input again while (not selection.isdigit()) or (int(selection) > int(option)): # Wait for user input - selection = input("Please select the subject ID field from the list above: \t" % option) - id_field=df.columns[int(selection)-1] - #make sure id_field is a string for zero-padded subject ids - #re-read data file with constraint that key field is read as string + selection = input( + "Please select the subject ID field from the list above: \t" + % option + ) + id_field = df.columns[int(selection) - 1] + # make sure id_field is a string for zero-padded subject ids + # re-read data file with constraint that key field is read as string if args.csv_file.endswith(".csv"): - df = pd.read_csv(args.csv_file,dtype={id_field : str}) + df = pd.read_csv(args.csv_file, dtype={id_field: str}) else: - df = pd.read_csv(args.csv_file, dtype={id_field: str},sep='\t') - - + df = pd.read_csv(args.csv_file, dtype={id_field: str}, sep="\t") ###use RDFLib here for temporary graph making query easier - #rdf_graph = Graph() - #rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle') + # rdf_graph = Graph() + # rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle') - #print("Querying for existing participants in NIDM graph....") + # print("Querying for existing participants in NIDM graph....") ###find subject ids and sessions in NIDM document - #query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent + # query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent # WHERE { # ?activity prov:wasAssociatedWith ?agent ; # dct:isPartOf ?session . @@ -206,185 +272,211 @@ def main(): # ndar:src_subject_id ?nidm_subj_id . # }""" ###print(query) - #qres = rdf_graph.query(query) - - - - - for index,row in qres.iterrows(): - logging.info("participant in NIDM file %s \t %s" %(row[0],row[1])) - #find row in CSV file with subject id matching agent from NIDM file - - #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])] - #find row in CSV file with matching subject id to the agent in the NIDM file - #be careful about data types...simply type-change dataframe subject id column and query to strings. - #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of - #time which column is the subject id.... - csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))] - - #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id) - #then add this CSV assessment data to NIDM file, else skip it.... - if (not (len(csv_row.index)==0)): - - logging.info("found participant in CSV file" ) + # qres = rdf_graph.query(query) + + for index, row in qres.iterrows(): + logging.info("participant in NIDM file %s \t %s" % (row[0], row[1])) + # find row in CSV file with subject id matching agent from NIDM file + + # csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])] + # find row in CSV file with matching subject id to the agent in the NIDM file + # be careful about data types...simply type-change dataframe subject id column and query to strings. + # here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of + # time which column is the subject id.... + csv_row = df.loc[ + df[id_field].astype("str").str.contains(str(row[1]).lstrip("0")) + ] + + # if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id) + # then add this CSV assessment data to NIDM file, else skip it.... + if not (len(csv_row.index) == 0): + logging.info("found participant in CSV file") # create a new session for this assessment - new_session=Session(project=project) + new_session = Session(project=project) - #NIDM document session uuid - #session_uuid = row[0] + # NIDM document session uuid + # session_uuid = row[0] - #temporary list of string-based URIs of session objects from API - #temp = [o.identifier._uri for o in session_objs] - #get session object from existing NIDM file that is associated with a specific subject id - #nidm_session = (i for i,x in enumerate([o.identifier._uri for o in session_objs]) if x == str(session_uuid)) - #nidm_session = session_objs[temp.index(str(session_uuid))] - #for nidm_session in session_objs: + # temporary list of string-based URIs of session objects from API + # temp = [o.identifier._uri for o in session_objs] + # get session object from existing NIDM file that is associated with a specific subject id + # nidm_session = (i for i,x in enumerate([o.identifier._uri for o in session_objs]) if x == str(session_uuid)) + # nidm_session = session_objs[temp.index(str(session_uuid))] + # for nidm_session in session_objs: # if nidm_session.identifier._uri == str(session_uuid): - #add an assessment acquisition for the phenotype data to session and associate with agent - #acq=AssessmentAcquisition(session=nidm_session) - acq=AssessmentAcquisition(session=new_session) - #add acquisition entity for assessment + # add an assessment acquisition for the phenotype data to session and associate with agent + # acq=AssessmentAcquisition(session=nidm_session) + acq = AssessmentAcquisition(session=new_session) + # add acquisition entity for assessment acq_entity = AssessmentObject(acquisition=acq) - #add qualified association with existing agent - acq.add_qualified_association(person=row[0],role=Constants.NIDM_PARTICIPANT) + # add qualified association with existing agent + acq.add_qualified_association( + person=row[0], role=Constants.NIDM_PARTICIPANT + ) # add git-annex info if exists - num_sources = addGitAnnexSources(obj=acq_entity,filepath=args.csv_file,bids_root=dirname(args.csv_file)) + num_sources = addGitAnnexSources( + obj=acq_entity, + filepath=args.csv_file, + bids_root=dirname(args.csv_file), + ) # if there aren't any git annex sources then just store the local directory information if num_sources == 0: # WIP: add absolute location of BIDS directory on disk for later finding of files - acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + args.csv_file}) + acq_entity.add_attributes( + {Constants.PROV["Location"]: "file:/" + args.csv_file} + ) # store file to acq_entity - acq_entity.add_attributes({Constants.NIDM_FILENAME:basename(args.csv_file)}) + acq_entity.add_attributes( + {Constants.NIDM_FILENAME: basename(args.csv_file)} + ) - #store other data from row with columns_to_term mappings + # store other data from row with columns_to_term mappings for row_variable in csv_row: - #check if row_variable is subject id, if so skip it - if row_variable==id_field: + # check if row_variable is subject id, if so skip it + if row_variable == id_field: continue else: if not csv_row[row_variable].values[0]: continue - - add_attributes_with_cde(acq_entity, cde, row_variable, csv_row[row_variable].values[0]) - - + add_attributes_with_cde( + acq_entity, + cde, + row_variable, + csv_row[row_variable].values[0], + ) continue - print ("Adding CDEs to graph....") + print("Adding CDEs to graph....") # convert to rdflib Graph and add CDEs rdf_graph = Graph() - rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle') + rdf_graph.parse(source=StringIO(project.serializeTurtle()), format="turtle") rdf_graph = rdf_graph + cde print("Backing up original NIDM file...") - copy2(src=args.nidm_file,dst=args.nidm_file+".bak") + copy2(src=args.nidm_file, dst=args.nidm_file + ".bak") print("Writing NIDM file....") - rdf_graph.serialize(destination=args.nidm_file,format='turtle') + rdf_graph.serialize(destination=args.nidm_file, format="turtle") else: print("Creating NIDM file...") - #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data - #create empty project - project=Project() + # If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data + # create empty project + project = Project() - #simply add name of file to project since we don't know anything about it - project.add_attributes({Constants.NIDM_FILENAME:args.csv_file}) + # simply add name of file to project since we don't know anything about it + project.add_attributes({Constants.NIDM_FILENAME: args.csv_file}) - - #look at column_to_terms dictionary for NIDM URL for subject id (Constants.NIDM_SUBJECTID) - id_field=None + # look at column_to_terms dictionary for NIDM URL for subject id (Constants.NIDM_SUBJECTID) + id_field = None for key, value in column_to_terms.items(): # using isAbout concept association to associate subject identifier variable from csv with a known term # for subject IDs - if 'isAbout' in column_to_terms[key]: + if "isAbout" in column_to_terms[key]: # iterate over isAbout list entries and look for Constants.NIDM_SUBJECTID - for entries in column_to_terms[key]['isAbout']: - if Constants.NIDM_SUBJECTID.uri == entries['@id']: + for entries in column_to_terms[key]["isAbout"]: + if Constants.NIDM_SUBJECTID.uri == entries["@id"]: key_tuple = eval(key) - id_field=key_tuple.variable - #make sure id_field is a string for zero-padded subject ids - #re-read data file with constraint that key field is read as string + id_field = key_tuple.variable + # make sure id_field is a string for zero-padded subject ids + # re-read data file with constraint that key field is read as string if args.csv_file.endswith(".csv"): - df = pd.read_csv(args.csv_file,dtype={id_field : str}) + df = pd.read_csv(args.csv_file, dtype={id_field: str}) else: - df = pd.read_csv(args.csv_file, dtype={id_field: str},sep='\t') + df = pd.read_csv( + args.csv_file, dtype={id_field: str}, sep="\t" + ) break - #if we couldn't find a subject ID field in column_to_terms, ask user + # if we couldn't find a subject ID field in column_to_terms, ask user if id_field is None: - option=1 + option = 1 for column in df.columns: - print("%d: %s" %(option,column)) - option=option+1 - selection=input("Please select the subject ID field from the list above: ") + print("%d: %s" % (option, column)) + option = option + 1 + selection = input( + "Please select the subject ID field from the list above: " + ) # Make sure user selected one of the options. If not present user with selection input again while (not selection.isdigit()) or (int(selection) > int(option)): # Wait for user input - selection = input("Please select the subject ID field from the list above: \t" % option) - id_field=df.columns[int(selection)-1] - #make sure id_field is a string for zero-padded subject ids - #re-read data file with constraint that key field is read as string + selection = input( + "Please select the subject ID field from the list above: \t" + % option + ) + id_field = df.columns[int(selection) - 1] + # make sure id_field is a string for zero-padded subject ids + # re-read data file with constraint that key field is read as string if args.csv_file.endswith(".csv"): - df = pd.read_csv(args.csv_file,dtype={id_field : str}) + df = pd.read_csv(args.csv_file, dtype={id_field: str}) else: - df = pd.read_csv(args.csv_file, dtype={id_field: str}, sep='\t') - + df = pd.read_csv(args.csv_file, dtype={id_field: str}, sep="\t") - #iterate over rows and store in NIDM file + # iterate over rows and store in NIDM file for csv_index, csv_row in df.iterrows(): - #create a session object - session=Session(project) + # create a session object + session = Session(project) - #create and acquisition activity and entity - acq=AssessmentAcquisition(session) - acq_entity=AssessmentObject(acq) + # create and acquisition activity and entity + acq = AssessmentAcquisition(session) + acq_entity = AssessmentObject(acq) - #create prov:Agent for subject - #acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']})) + # create prov:Agent for subject + # acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']})) # add git-annex info if exists - num_sources = addGitAnnexSources(obj=acq_entity,filepath=args.csv_file,bids_root=os.path.dirname(args.csv_file)) + num_sources = addGitAnnexSources( + obj=acq_entity, + filepath=args.csv_file, + bids_root=os.path.dirname(args.csv_file), + ) # if there aren't any git annex sources then just store the local directory information if num_sources == 0: # WIP: add absolute location of BIDS directory on disk for later finding of files - acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + args.csv_file}) + acq_entity.add_attributes( + {Constants.PROV["Location"]: "file:/" + args.csv_file} + ) # store file to acq_entity - acq_entity.add_attributes({Constants.NIDM_FILENAME : basename(args.csv_file)}) + acq_entity.add_attributes( + {Constants.NIDM_FILENAME: basename(args.csv_file)} + ) - - #store other data from row with columns_to_term mappings - for row_variable,row_data in csv_row.iteritems(): + # store other data from row with columns_to_term mappings + for row_variable, row_data in csv_row.iteritems(): if not row_data: continue - #check if row_variable is subject id, if so skip it - if row_variable==id_field: + # check if row_variable is subject id, if so skip it + if row_variable == id_field: ### WIP: Check if agent already exists with the same ID. If so, use it else create a new agent - #add qualified association with person - acq.add_qualified_association(person= acq.add_person(attributes=({Constants.NIDM_SUBJECTID:str(row_data)})),role=Constants.NIDM_PARTICIPANT) + # add qualified association with person + acq.add_qualified_association( + person=acq.add_person( + attributes=({Constants.NIDM_SUBJECTID: str(row_data)}) + ), + role=Constants.NIDM_PARTICIPANT, + ) continue else: add_attributes_with_cde(acq_entity, cde, row_variable, row_data) - #print(project.serializeTurtle()) + # print(project.serializeTurtle()) # convert to rdflib Graph and add CDEs rdf_graph = Graph() - rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle') + rdf_graph.parse(source=StringIO(project.serializeTurtle()), format="turtle") rdf_graph = rdf_graph + cde print("Writing NIDM file....") - rdf_graph.serialize(destination=args.output_file,format='turtle') - + rdf_graph.serialize(destination=args.output_file, format="turtle") if __name__ == "__main__": - main() + main() diff --git a/nidm/experiment/tools/nidm2bids.py b/nidm/experiment/tools/nidm2bids.py index 1543ab67..9a5f2834 100644 --- a/nidm/experiment/tools/nidm2bids.py +++ b/nidm/experiment/tools/nidm2bids.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # NIDM2BIDSMRI.py # License: Apache License, Version 2.0 -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # Date: 10-2-17 Coded by: David Keator (dbkeator@gmail.com) # Filename: NIDM2BIDSMRI.py # @@ -14,62 +14,74 @@ # according to the BIDS specification, the demographics metadata to a participants.tsv # file, the project-level metadata to a dataset_description.json file, and the # assessments to *.tsv/*.json file pairs in a phenotypes directory. -#************************************************************************************** +# ************************************************************************************** # Development environment: Python - PyCharm IDE # -#************************************************************************************** +# ************************************************************************************** # System requirements: Python 3.X # Libraries: pybids, numpy, matplotlib, pandas, scipy, math, dateutil, datetime,argparse, # os,sys,getopt,csv -#************************************************************************************** +# ************************************************************************************** # Start date: 10-2-17 # Update history: # DATE MODIFICATION Who # # -#************************************************************************************** +# ************************************************************************************** # Programmer comments: # # -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** -import sys, getopt, os -from os.path import join, isfile, basename, isdir,splitext -from os import mkdir -from os import system - -from nidm.experiment import Project,Session,Acquisition,AcquisitionObject,DemographicsObject,AssessmentObject, MRObject -from nidm.core import BIDS_Constants,Constants -from prov.model import PROV_LABEL,PROV_TYPE -from nidm.experiment.Utils import read_nidm, write_json_mapping_file -from nidm.experiment.Query import GetProjectsUUID, GetProjectLocation, GetParticipantIDFromAcquisition -from nidm.core.Constants import DD - -import json -from pprint import pprint +from argparse import ArgumentParser import csv +import getopt import glob -from rdflib import Graph,URIRef,RDF -from argparse import ArgumentParser from io import StringIO -import pandas as pd -import validators -import urllib.parse +import json +import os +from os import mkdir, system +from os.path import basename, isdir, isfile, join, splitext +from pprint import pprint from shutil import copyfile, move -import urllib.request as ur +import sys import tempfile +import urllib.parse +import urllib.request as ur import datalad.api as dl +from nidm.core import BIDS_Constants, Constants +from nidm.core.Constants import DD +from nidm.experiment import ( + Acquisition, + AcquisitionObject, + AssessmentObject, + DemographicsObject, + MRObject, + Project, + Session, +) +from nidm.experiment.Query import ( + GetParticipantIDFromAcquisition, + GetProjectLocation, + GetProjectsUUID, +) +from nidm.experiment.Utils import read_nidm, write_json_mapping_file +import pandas as pd +from prov.model import PROV_LABEL, PROV_TYPE +from rdflib import RDF, Graph, URIRef +import validators + -def GetImageFromAWS(location, output_file,args): - ''' +def GetImageFromAWS(location, output_file, args): + """ This function will attempt to get a BIDS image identified by location from AWS S3. It only supports known URLs at this time (e.g. openneuro) :param location: path string to file. This can be a local path. Function will try and detect if this is a known project/archive and if so will format theh S3 string appropriately. Otherwise it will return None :param output_file: This is the full path and filename to store the S3 downloaded file if successful :return: None if file not downloaded else will return True - ''' + """ print("Trying AWS S3 for dataset: %s" % location) # modify location to remove everything before the dataset name @@ -78,13 +90,19 @@ def GetImageFromAWS(location, output_file,args): # we could pick that out but for others it's difficult (impossible)? # case for openneuro - if 'openneuro' in location: + if "openneuro" in location: # remove everything from location string before openneuro - openneuro_loc = location[location.find("openneuro/") + 10:] + openneuro_loc = location[location.find("openneuro/") + 10 :] # get a temporary directory for this file temp_dir = tempfile.TemporaryDirectory() # aws command - cmd = "aws s3 cp --no-sign-request " + "s3://openneuro.org/" + openneuro_loc + " " + temp_dir.name + cmd = ( + "aws s3 cp --no-sign-request " + + "s3://openneuro.org/" + + openneuro_loc + + " " + + temp_dir.name + ) # execute command print(cmd) system(cmd) @@ -96,7 +114,7 @@ def GetImageFromAWS(location, output_file,args): try: # copy file from temp_dir to bids dataset print("Copying temporary file to final location....") - copyfile(join(temp_dir.name, basename(location)),output_file) + copyfile(join(temp_dir.name, basename(location)), output_file) return True except: print("Couldn't get dataset from AWS either...") @@ -105,10 +123,10 @@ def GetImageFromAWS(location, output_file,args): elif args.aws_baseurl: aws_baseurl = args.aws_baseurl # check if user supplied the last '/' in the aws_baseurl or not. If not, add it. - if aws_baseurl[-1] != '/': - aws_baseurl = aws_baseurl = '/' + if aws_baseurl[-1] != "/": + aws_baseurl = aws_baseurl = "/" # remove everything from location string before openneuro - loc = location[location.find(args.dataset_string) + len(args.dataset_string):] + loc = location[location.find(args.dataset_string) + len(args.dataset_string) :] # get a temporary directory for this file temp_dir = tempfile.TemporaryDirectory() # aws command @@ -131,14 +149,12 @@ def GetImageFromAWS(location, output_file,args): return None - def GetImageFromURL(url): - ''' + """ This function will try and retrieve the file referenced by url :param url: url to file to download :return: temporary filename or -1 if fails - ''' - + """ # try to open the url and get the pointed to file try: @@ -153,32 +169,36 @@ def GetImageFromURL(url): print("ERROR! Can't open url: %s" % url) return -1 -def GetDataElementMetadata(nidm_graph,de_uuid): - ''' + +def GetDataElementMetadata(nidm_graph, de_uuid): + """ This function will query the nidm_graph for the DataElement de_uuid and return all the metadata as a BIDS-compliant participants sidecar file dictionary - ''' + """ # query nidm_graph for Constants.NIIRI[de_uuid] rdf:type PersonalDataElement - query = """ + query = ( + """ PREFIX rdf: PREFIX prov: PREFIX niiri: PREFIX nidm: - + select distinct ?p ?o where { - + <%s> rdf:type nidm:PersonalDataElement ; ?p ?o . - } - """ % Constants.NIIRI[de_uuid] + } + """ + % Constants.NIIRI[de_uuid] + ) # print(query) qres = nidm_graph.query(query) # set up a dictionary entry for this column - #current_tuple = str(DD(source="participants.tsv", variable=column)) + # current_tuple = str(DD(source="participants.tsv", variable=column)) # temporary dictionary of metadata temp_dict = {} @@ -187,48 +207,58 @@ def GetDataElementMetadata(nidm_graph,de_uuid): temp_dict[str(row[0])] = str(row[1]) # set up a dictionary entry for this column - current_tuple = str(DD(source="participants.tsv", variable= - temp_dict['http://purl.org/nidash/nidm#sourceVariable'])) + current_tuple = str( + DD( + source="participants.tsv", + variable=temp_dict["http://purl.org/nidash/nidm#sourceVariable"], + ) + ) de = {} de[current_tuple] = {} # now look for label entry in temp_dict and set up a proper NIDM-style JSON data structure # see Utils.py function map_variables_to_terms for example (column_to_terms[current_tuple]) - for key,value in temp_dict.items(): - if key == 'http://purl.org/nidash/nidm#sourceVariable': - de[current_tuple]['source_variable'] = value - elif key == 'http://purl.org/dc/terms/description': - de[current_tuple]['description'] = value - elif key == 'http://purl.org/nidash/nidm#isAbout': + for key, value in temp_dict.items(): + if key == "http://purl.org/nidash/nidm#sourceVariable": + de[current_tuple]["source_variable"] = value + elif key == "http://purl.org/dc/terms/description": + de[current_tuple]["description"] = value + elif key == "http://purl.org/nidash/nidm#isAbout": # here we need to do an additional query to see if there's a label associated with the isAbout value - de[current_tuple]['isAbout'] = [] + de[current_tuple]["isAbout"] = [] # check whether there are multiple 'isAbout' entries - if type(value) == 'list': + if type(value) == "list": # if this is a list we have to loop through the entries and store the url and labels for entry in value: # query for label for this isAbout URL - query = ''' + query = ( + """ prefix prov: prefix rdfs: prefix rdf: - + select distinct ?label where { <%s> rdf:type prov:Entity ; - rdfs:label ?label . - } - ''' % entry - #print(query) + rdfs:label ?label . + } + """ + % entry + ) + # print(query) qres = nidm_graph.query(query) for row in qres: - de[current_tuple]['isAbout'].append({'@id': value, 'label': row[0]}) + de[current_tuple]["isAbout"].append( + {"@id": value, "label": row[0]} + ) else: # only 1 isAbout entry # query for label for this isAbout URL - query = ''' + query = ( + """ prefix prov: prefix rdfs: @@ -237,88 +267,98 @@ def GetDataElementMetadata(nidm_graph,de_uuid): select distinct ?label where { <%s> rdf:type prov:Entity ; - rdfs:label ?label . - } - ''' % value + rdfs:label ?label . + } + """ + % value + ) # print(query) qres = nidm_graph.query(query) for row in qres: - de[current_tuple]['isAbout'].append({'@id': value, 'label': row[0]}) - - elif key == 'http://www.w3.org/2000/01/rdf-schema#label': - de[current_tuple]['label'] = value - elif key =='http://purl.org/nidash/nidm#valueType': - if 'responseOptions' not in de[current_tuple].keys(): - de[current_tuple]['responseOptions'] = {} - de[current_tuple]['responseOptions']['valueType'] = value + de[current_tuple]["isAbout"].append({"@id": value, "label": row[0]}) + + elif key == "http://www.w3.org/2000/01/rdf-schema#label": + de[current_tuple]["label"] = value + elif key == "http://purl.org/nidash/nidm#valueType": + if "responseOptions" not in de[current_tuple].keys(): + de[current_tuple]["responseOptions"] = {} + de[current_tuple]["responseOptions"]["valueType"] = value else: - de[current_tuple]['responseOptions']['valueType'] = value - elif key == 'http://purl.org/nidash/nidm#levels': - if 'responseOptions' not in de[current_tuple].keys(): - de[current_tuple]['responseOptions'] = {} - de[current_tuple]['responseOptions']['levels'] = value + de[current_tuple]["responseOptions"]["valueType"] = value + elif key == "http://purl.org/nidash/nidm#levels": + if "responseOptions" not in de[current_tuple].keys(): + de[current_tuple]["responseOptions"] = {} + de[current_tuple]["responseOptions"]["levels"] = value else: - de[current_tuple]['responseOptions']['levels'] = value - elif key == 'http://uri.interlex.org/ilx_0739289': - de[current_tuple]['associatedWith'] = value - elif key == Constants.NIDM['minValue']: - de[current_tuple]['responseOptions']['minValue'] = value - elif key == Constants.NIDM['maxValue']: - de[current_tuple]['responseOptions']['maxValue'] = value - elif key == Constants.NIDM['url']: - de[current_tuple]['url'] = value + de[current_tuple]["responseOptions"]["levels"] = value + elif key == "http://uri.interlex.org/ilx_0739289": + de[current_tuple]["associatedWith"] = value + elif key == Constants.NIDM["minValue"]: + de[current_tuple]["responseOptions"]["minValue"] = value + elif key == Constants.NIDM["maxValue"]: + de[current_tuple]["responseOptions"]["maxValue"] = value + elif key == Constants.NIDM["url"]: + de[current_tuple]["url"] = value return de -def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields): - ''' +def CreateBIDSParticipantFile(nidm_graph, output_file, participant_fields): + """ Creates participant file based on requested fields :param nidm_graph: :param output_directory: :param fields: :return: - ''' + """ print("Creating participants.json file...") fields = ["participant_id"] - #fields.extend(participant_fields) - participants=pd.DataFrame(columns=fields,index=[1]) + # fields.extend(participant_fields) + participants = pd.DataFrame(columns=fields, index=[1]) participants_json = {} - #for each Constants.NIDM_SUBJECTID in NIDM file - row_index=1 - for subj_uri,subj_id in nidm_graph.subject_objects(predicate=URIRef(Constants.NIDM_SUBJECTID.uri)): - - #adding subject ID to data list to append to participants data frame - participants.loc[row_index,'participant_id',] = subj_id - - #for each of the fields in the participants list + # for each Constants.NIDM_SUBJECTID in NIDM file + row_index = 1 + for subj_uri, subj_id in nidm_graph.subject_objects( + predicate=URIRef(Constants.NIDM_SUBJECTID.uri) + ): + # adding subject ID to data list to append to participants data frame + participants.loc[ + row_index, + "participant_id", + ] = subj_id + + # for each of the fields in the participants list for fields in participant_fields: - #if field identifier isn't a proper URI then do a fuzzy search on the graph, else an explicit search for the URL - if(validators.url(fields)): - #then this is a valid URI so simply query nidm_project document for it - for subj,obj in nidm_graph.subject_objects(predicate=URIRef(BIDS_Constants.participants[fields].uri)): - #add row to the pandas data frame - #data.append(obj) - participants.loc[row_index,BIDS_Constants.participants[fields].uri] = obj + # if field identifier isn't a proper URI then do a fuzzy search on the graph, else an explicit search for the URL + if validators.url(fields): + # then this is a valid URI so simply query nidm_project document for it + for subj, obj in nidm_graph.subject_objects( + predicate=URIRef(BIDS_Constants.participants[fields].uri) + ): + # add row to the pandas data frame + # data.append(obj) + participants.loc[ + row_index, BIDS_Constants.participants[fields].uri + ] = obj # find Data Element and add metadata to participants_json dictionary else: - #text matching task, remove basepart of URIs and try to fuzzy match the field in the part_fields parameter string - #to the "term" part of a qname URI...this part let's a user simply ask for "age" for example without knowing the - #complete URI....hopefully + # text matching task, remove basepart of URIs and try to fuzzy match the field in the part_fields parameter string + # to the "term" part of a qname URI...this part let's a user simply ask for "age" for example without knowing the + # complete URI....hopefully # - #This needs to be a more complex query: + # This needs to be a more complex query: # Step(1): For subj_uri query for prov:Activity that were prov:wasAttributedTo subj_uri # Step(2): Query for prov:Entity that were prov:wasGeneratedBy uris from Step(1) # Step(3): For each metadata triple in objects whose subject is uris from Step(2), fuzzy match predicate after # removing base of uri to "fields" in participants list, then add these to data list for appending to pandas - match_ratio={} + match_ratio = {} # - #Steps(1):(3) + # Steps(1):(3) query = """ PREFIX rdf: @@ -338,43 +378,44 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields): rdf:type onli:assessment-instrument ; ?pred ?value . FILTER (regex(str(?pred) ,"%s","i" )) - }""" % (subj_uri,fields) - #print(query) + }""" % ( + subj_uri, + fields, + ) + # print(query) qres = nidm_graph.query(query) for row in qres: - #use last field in URIs for short column name and add full URI to sidecar participants.json file - url_parts = urllib.parse.urlsplit(row[0],scheme='#') + # use last field in URIs for short column name and add full URI to sidecar participants.json file + url_parts = urllib.parse.urlsplit(row[0], scheme="#") - if url_parts.fragment == '': - #do some parsing of the path URL because this particular one has no fragments + if url_parts.fragment == "": + # do some parsing of the path URL because this particular one has no fragments url_parts = urllib.parse.urlparse(row[0]) - path_parts = url_parts[2].rpartition('/') + path_parts = url_parts[2].rpartition("/") short_name = path_parts[2] else: short_name = url_parts.fragment # find Data Element and add metadata to participants_json dictionary - if 'de' not in locals(): + if "de" not in locals(): de = GetDataElementMetadata(nidm_graph, short_name) else: de.update(GetDataElementMetadata(nidm_graph, short_name)) - participants.loc[row_index,str(short_name)] = str(row[1]) - #data.append(str(row[1])) + participants.loc[row_index, str(short_name)] = str(row[1]) + # data.append(str(row[1])) - #add row to participants DataFrame - #participants=participants.append(pd.DataFrame(data)) + # add row to participants DataFrame + # participants=participants.append(pd.DataFrame(data)) participants - row_index = row_index+1 - - - #save participants.tsv file - participants.to_csv(output_file + ".tsv",sep='\t',index=False) - #save participants.json file - with open(output_file + ".json",'w') as f: - json.dump(participants_json,f,sort_keys=True,indent=2) + row_index = row_index + 1 + # save participants.tsv file + participants.to_csv(output_file + ".tsv", sep="\t", index=False) + # save participants.json file + with open(output_file + ".json", "w") as f: + json.dump(participants_json, f, sort_keys=True, indent=2) # save participant sidecar file write_json_mapping_file(de, join(splitext(output_file)[0] + ".json"), True) @@ -382,110 +423,127 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields): return participants, participants_json - -def NIDMProject2BIDSDatasetDescriptor(nidm_graph,output_directory): - ''' +def NIDMProject2BIDSDatasetDescriptor(nidm_graph, output_directory): + """ :param nidm_graph: RDFLib graph object from NIDM-Exp file :param output_dir: directory for writing dataset_description of BIDS dataset :return: None - ''' + """ print("Creating dataset_description.json file...") - #Project -> Dataset_description.json############################################ - #get json representation of project metadata + # Project -> Dataset_description.json############################################ + # get json representation of project metadata project_metadata = nidm_graph.get_metadata_dict(Constants.NIDM_PROJECT) - #print(project_metadata) + # print(project_metadata) - #cycle through keys converting them to BIDS keys - #make copy of project_metadata + # cycle through keys converting them to BIDS keys + # make copy of project_metadata project_metadata_tmp = dict(project_metadata) - #iterate over the temporary dictionary and delete items from the original - for proj_key,value in project_metadata_tmp.items(): - key_found=0 - #print("proj_key = %s " % proj_key) - #print("project_metadata[proj_key] = %s" %project_metadata[proj_key]) + # iterate over the temporary dictionary and delete items from the original + for proj_key, value in project_metadata_tmp.items(): + key_found = 0 + # print("proj_key = %s " % proj_key) + # print("project_metadata[proj_key] = %s" %project_metadata[proj_key]) - for key,value in BIDS_Constants.dataset_description.items(): + for key, value in BIDS_Constants.dataset_description.items(): if BIDS_Constants.dataset_description[key]._uri == proj_key: # added since BIDS validator validates values of certain keys - if (key == "Authors") or (key == "Funding") or (key == "ReferencesAndLinks"): + if ( + (key == "Authors") + or (key == "Funding") + or (key == "ReferencesAndLinks") + ): project_metadata[key] = [project_metadata[proj_key]] else: project_metadata[key] = project_metadata[proj_key] del project_metadata[proj_key] - key_found=1 + key_found = 1 continue - #if this proj_key wasn't found in BIDS dataset_description Constants dictionary then delete it + # if this proj_key wasn't found in BIDS dataset_description Constants dictionary then delete it if not key_found: del project_metadata[proj_key] - with open(join(output_directory, "dataset_description.json"),'w') as f: - json.dump(project_metadata,f,sort_keys=True,indent=2) + with open(join(output_directory, "dataset_description.json"), "w") as f: + json.dump(project_metadata, f, sort_keys=True, indent=2) ############################################################################## -def AddMetadataToImageSidecar(graph_entity,graph, output_directory, image_filename): - ''' + +def AddMetadataToImageSidecar(graph_entity, graph, output_directory, image_filename): + """ This function will query the metadata in graph_entity and compare the entries with mappings in core/BIDS_Constants.py json_keys where we'll be mapping the value (NIDM entry) to key (BIDS key). It will create the appropriate sidecar json file associated with image_filename in output_directory. - ''' + """ # query graph for metadata associated with graph_entity - query = ''' + query = ( + """ Select DISTINCT ?p ?o WHERE { <%s> ?p ?o . } - ''' %graph_entity + """ + % graph_entity + ) qres = graph.query(query) # dictionary to store metadata json_dict = {} for row in qres: - key = next((k for k in BIDS_Constants.json_keys if BIDS_Constants.json_keys[k] == row[0]), None) + key = next( + ( + k + for k in BIDS_Constants.json_keys + if BIDS_Constants.json_keys[k] == row[0] + ), + None, + ) if key != None: json_dict[key] = row[1] # write json_dict out to appropriate sidecar filename - with open(join(output_directory,image_filename + ".json"),"w") as fp: - json.dump(json_dict,fp,indent=2) + with open(join(output_directory, image_filename + ".json"), "w") as fp: + json.dump(json_dict, fp, indent=2) -def ProcessFiles(graph,scan_type,output_directory,project_location,args): - ''' +def ProcessFiles(graph, scan_type, output_directory, project_location, args): + """ This function will essentially cycle through the acquisition objects in the NIDM file loaded into graph and depending on the scan_type will try and copy the image to the output_directory - ''' + """ if scan_type == Constants.NIDM_MRI_DIFFUSION_TENSOR.uri: - bids_ext = 'dwi' + bids_ext = "dwi" elif scan_type == Constants.NIDM_MRI_ANATOMIC_SCAN.uri: - bids_ext = 'anat' + bids_ext = "anat" elif scan_type == Constants.NIDM_MRI_FUNCTION_SCAN.uri: - bids_ext = 'func' + bids_ext = "func" # query NIDM document for acquisition entity "subjects" with predicate nidm:hasImageUsageType and object scan_type - for acq in graph.subjects(predicate=URIRef(Constants.NIDM_IMAGE_USAGE_TYPE.uri), - object=URIRef(scan_type)): + for acq in graph.subjects( + predicate=URIRef(Constants.NIDM_IMAGE_USAGE_TYPE.uri), object=URIRef(scan_type) + ): # first see if file exists locally. Get nidm:Project prov:Location and append the nfo:Filename of the image # from the acq acquisition entity. If that file doesn't exist try the prov:Location in the func acq # entity and see if we can download it from the cloud # get acquisition uuid from entity uuid - temp = graph.objects(subject=acq, predicate=Constants.PROV['wasGeneratedBy']) + temp = graph.objects(subject=acq, predicate=Constants.PROV["wasGeneratedBy"]) for item in temp: activity = item # get participant ID with sio:Subject role in anat_acq qualified association - part_id = GetParticipantIDFromAcquisition(nidm_file_list=[args.rdf_file], acquisition=activity) + part_id = GetParticipantIDFromAcquisition( + nidm_file_list=[args.rdf_file], acquisition=activity + ) # make BIDS sub directory - if 'sub' in (part_id['ID'].values)[0]: - sub_dir = join(output_directory, (part_id['ID'].values)[0]) + if "sub" in (part_id["ID"].values)[0]: + sub_dir = join(output_directory, (part_id["ID"].values)[0]) else: - sub_dir = join(output_directory, "sub-" + (part_id['ID'].values)[0]) - sub_filename_base = "sub-" + (part_id['ID'].values)[0] + sub_dir = join(output_directory, "sub-" + (part_id["ID"].values)[0]) + sub_filename_base = "sub-" + (part_id["ID"].values)[0] if not os.path.exists(sub_dir): os.makedirs(sub_dir) @@ -493,70 +551,117 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args): if not os.path.exists(join(sub_dir, bids_ext)): os.makedirs(join(sub_dir, bids_ext)) - for filename in graph.objects(subject=acq,predicate=URIRef(Constants.NIDM_FILENAME.uri)): + for filename in graph.objects( + subject=acq, predicate=URIRef(Constants.NIDM_FILENAME.uri) + ): # check if file exists for location in project_location: # if MRI exists in this location then copy and rename if isfile((location[0] + filename).lstrip("file:")): # copy and rename file to be BIDS compliant - copyfile((location[0] + filename).lstrip("file:"), - join(sub_dir, bids_ext, sub_filename_base + splitext(filename)[1])) + copyfile( + (location[0] + filename).lstrip("file:"), + join( + sub_dir, bids_ext, sub_filename_base + splitext(filename)[1] + ), + ) continue # if the file wasn't accessible locally, try with the prov:Location in the acq - for location in graph.objects(subject=acq,predicate=URIRef(Constants.PROV['Location'])): + for location in graph.objects( + subject=acq, predicate=URIRef(Constants.PROV["Location"]) + ): # try to download the file and rename ret = GetImageFromURL(location) if ret == -1: - print("ERROR! Can't download file: %s from url: %s, trying to copy locally...." % ( - filename, location)) + print( + "ERROR! Can't download file: %s from url: %s, trying to copy locally...." + % (filename, location) + ) if "file" in location: location = str(location).lstrip("file:") print("Trying to copy file from %s" % (location)) try: - copyfile(location, join(output_directory, sub_dir, bids_ext, basename(filename))) + copyfile( + location, + join( + output_directory, + sub_dir, + bids_ext, + basename(filename), + ), + ) except: - print("ERROR! Failed to find file %s on filesystem..." % location) + print( + "ERROR! Failed to find file %s on filesystem..." + % location + ) if not args.no_downloads: try: print( - "Running datalad get command on dataset: %s" % location) - dl.Dataset(os.path.dirname(location)).get(recursive=True, jobs=1) + "Running datalad get command on dataset: %s" + % location + ) + dl.Dataset(os.path.dirname(location)).get( + recursive=True, jobs=1 + ) except: - print("ERROR! Datalad returned error: %s for dataset %s." % ( - sys.exc_info()[0], location)) - GetImageFromAWS(location=location, output_file= - join(output_directory, sub_dir, bids_ext, basename(filename)),args=args) + print( + "ERROR! Datalad returned error: %s for dataset %s." + % (sys.exc_info()[0], location) + ) + GetImageFromAWS( + location=location, + output_file=join( + output_directory, + sub_dir, + bids_ext, + basename(filename), + ), + args=args, + ) else: # copy temporary file to BIDS directory - copyfile(ret, join(output_directory, sub_dir, bids_ext, basename(filename))) + copyfile( + ret, + join(output_directory, sub_dir, bids_ext, basename(filename)), + ) # if we were able to copy the image file then add the json sidecar file with additional metadata # available in the NIDM file - if isfile(join(output_directory, sub_dir, bids_ext, basename(filename))): + if isfile( + join(output_directory, sub_dir, bids_ext, basename(filename)) + ): # get rest of metadata for this acquisition and store in sidecar file if "gz" in basename(filename): image_filename = splitext(splitext(basename(filename))[0])[0] else: image_filename = splitext(basename(filename))[0] - AddMetadataToImageSidecar(graph_entity=acq,graph=graph,output_directory=join(output_directory, - sub_dir,bids_ext),image_filename=image_filename) + AddMetadataToImageSidecar( + graph_entity=acq, + graph=graph, + output_directory=join(output_directory, sub_dir, bids_ext), + image_filename=image_filename, + ) # if this is a DWI scan then we should copy over the b-value and b-vector files - if bids_ext == 'dwi': + if bids_ext == "dwi": # search for entity uuid with rdf:type nidm:b-value that was generated by activity - query = """ + query = ( + """ PREFIX rdf: PREFIX prov: PREFIX nidm: - + SELECT DISTINCT ?entity WHERE { ?entity rdf:type ; prov:wasGeneratedBy <%s> . - }""" % activity + }""" + % activity + ) # print(query) qres = graph.query(query) @@ -564,32 +669,63 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args): bval_entity = str(row[0]) # if the file wasn't accessible locally, try with the prov:Location in the acq - for location in graph.objects(subject=URIRef(bval_entity), predicate=URIRef(Constants.PROV['Location'])): + for location in graph.objects( + subject=URIRef(bval_entity), + predicate=URIRef(Constants.PROV["Location"]), + ): # try to download the file and rename ret = GetImageFromURL(location) if ret == -1: - print("ERROR! Can't download file: %s from url: %s, trying to copy locally...." % ( - filename, location)) + print( + "ERROR! Can't download file: %s from url: %s, trying to copy locally...." + % (filename, location) + ) if "file" in location: location = str(location).lstrip("file:") print("Trying to copy file from %s" % (location)) try: - copyfile(location, join(output_directory, sub_dir, bids_ext, basename(location))) + copyfile( + location, + join( + output_directory, + sub_dir, + bids_ext, + basename(location), + ), + ) except: - print("ERROR! Failed to find file %s on filesystem..." % location) + print( + "ERROR! Failed to find file %s on filesystem..." + % location + ) if not args.no_downloads: try: print( - "Running datalad get command on dataset: %s" % location) - dl.Dataset(os.path.dirname(location)).get(recursive=True, jobs=1) + "Running datalad get command on dataset: %s" + % location + ) + dl.Dataset(os.path.dirname(location)).get( + recursive=True, jobs=1 + ) except: - print("ERROR! Datalad returned error: %s for dataset %s." % ( - sys.exc_info()[0], location)) - GetImageFromAWS(location=location, output_file= - join(output_directory, sub_dir, bids_ext, basename(location)),args=args) + print( + "ERROR! Datalad returned error: %s for dataset %s." + % (sys.exc_info()[0], location) + ) + GetImageFromAWS( + location=location, + output_file=join( + output_directory, + sub_dir, + bids_ext, + basename(location), + ), + args=args, + ) # search for entity uuid with rdf:type nidm:b-value that was generated by activity - query = """ + query = ( + """ PREFIX rdf: PREFIX prov: PREFIX nidm: @@ -598,7 +734,9 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args): WHERE { ?entity rdf:type ; prov:wasGeneratedBy <%s> . - }""" % activity + }""" + % activity + ) # print(query) qres = graph.query(query) @@ -606,59 +744,126 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args): bvec_entity = str(row[0]) # if the file wasn't accessible locally, try with the prov:Location in the acq - for location in graph.objects(subject=URIRef(bvec_entity), - predicate=URIRef(Constants.PROV['Location'])): + for location in graph.objects( + subject=URIRef(bvec_entity), + predicate=URIRef(Constants.PROV["Location"]), + ): # try to download the file and rename ret = GetImageFromURL(location) if ret == -1: print( - "ERROR! Can't download file: %s from url: %s, trying to copy locally...." % ( - filename, location)) + "ERROR! Can't download file: %s from url: %s, trying to copy locally...." + % (filename, location) + ) if "file" in location: location = str(location).lstrip("file:") print("Trying to copy file from %s" % (location)) try: - copyfile(location, - join(output_directory, sub_dir, bids_ext, basename(location))) + copyfile( + location, + join( + output_directory, + sub_dir, + bids_ext, + basename(location), + ), + ) except: - print("ERROR! Failed to find file %s on filesystem..." % location) + print( + "ERROR! Failed to find file %s on filesystem..." + % location + ) if not args.no_downloads: try: print( - "Running datalad get command on dataset: %s" % location) - dl.Dataset(os.path.dirname(location)).get(recursive=True, - jobs=1) + "Running datalad get command on dataset: %s" + % location + ) + dl.Dataset(os.path.dirname(location)).get( + recursive=True, jobs=1 + ) except: - print("ERROR! Datalad returned error: %s for dataset %s." % ( - sys.exc_info()[0], location)) - GetImageFromAWS(location=location, output_file= - join(output_directory, sub_dir, bids_ext, basename(location)), - args=args) + print( + "ERROR! Datalad returned error: %s for dataset %s." + % (sys.exc_info()[0], location) + ) + GetImageFromAWS( + location=location, + output_file=join( + output_directory, + sub_dir, + bids_ext, + basename(location), + ), + args=args, + ) def main(argv): - parser = ArgumentParser(description='This program will convert a NIDM-Experiment RDF document \ + parser = ArgumentParser( + description="This program will convert a NIDM-Experiment RDF document \ to a BIDS dataset. The program will query the NIDM-Experiment document for subjects, \ MRI scans, and associated assessments saving the MRI data to disk in an organization \ according to the BIDS specification, metadata to a participants.tsv \ file, the project-level metadata to a dataset_description.json file, and the \ - assessments to *.tsv/*.json file pairs in a phenotypes directory.', epilog='Example of use: \ - NIDM2BIDSMRI.py -nidm_file NIDM.ttl -part_fields age,gender -bids_dir BIDS') - - parser.add_argument('-nidm_file', dest='rdf_file', required=True, help="NIDM RDF file") - parser.add_argument('-part_fields', nargs='+', dest='part_fields', required=False, \ - help='Variables to add to BIDS participant file. Variables will be fuzzy-matched to NIDM URIs') - parser.add_argument('-anat', dest='anat', action='store_true', required=False, help="Include flag to add anatomical scans to BIDS dataset") - parser.add_argument('-func', dest='func', action='store_true', required=False, help="Include flag to add functional scans + events files to BIDS dataset") - parser.add_argument('-dwi', dest='dwi', action='store_true', required=False, help="Include flag to add DWI scans + Bval/Bvec files to BIDS dataset") - parser.add_argument('-bids_dir', dest='bids_dir', required=True, help="Directory to store BIDS dataset") + assessments to *.tsv/*.json file pairs in a phenotypes directory.", + epilog="Example of use: \ + NIDM2BIDSMRI.py -nidm_file NIDM.ttl -part_fields age,gender -bids_dir BIDS", + ) + + parser.add_argument( + "-nidm_file", dest="rdf_file", required=True, help="NIDM RDF file" + ) + parser.add_argument( + "-part_fields", + nargs="+", + dest="part_fields", + required=False, + help="Variables to add to BIDS participant file. Variables will be fuzzy-matched to NIDM URIs", + ) + parser.add_argument( + "-anat", + dest="anat", + action="store_true", + required=False, + help="Include flag to add anatomical scans to BIDS dataset", + ) + parser.add_argument( + "-func", + dest="func", + action="store_true", + required=False, + help="Include flag to add functional scans + events files to BIDS dataset", + ) + parser.add_argument( + "-dwi", + dest="dwi", + action="store_true", + required=False, + help="Include flag to add DWI scans + Bval/Bvec files to BIDS dataset", + ) + parser.add_argument( + "-bids_dir", + dest="bids_dir", + required=True, + help="Directory to store BIDS dataset", + ) group = parser.add_mutually_exclusive_group() - group.add_argument('-no_downloads',dest='no_downloads', action='store_true',required=False, help= - "If this flag is set then script won't attempt to download images using datalad" - "and AWS S3. Default behavior is files are downloaded if they don't exist locally.") - group.add_argument('-aws_url', dest='aws_url', required=False, help="This tool facilities export of " + group.add_argument( + "-no_downloads", + dest="no_downloads", + action="store_true", + required=False, + help="If this flag is set then script won't attempt to download images using datalad" + "and AWS S3. Default behavior is files are downloaded if they don't exist locally.", + ) + group.add_argument( + "-aws_url", + dest="aws_url", + required=False, + help="This tool facilities export of " "user-selected information from a NIDM file to a BIDS dataset and may have to fetch images. The NIDM files contain links from" "the local filesystem used to convert BIDS to NIDM and possibly DataLad dataset links to the files if the" " original BIDS data was a DataLad dataset. Here we support 3 modes of trying to find images: (1) copy from" @@ -667,21 +872,29 @@ def main(argv): " to download the images via a AWS S3 link. This parameter lets the user set the base AWS S3 URL to try and" " find the images. Currently it supports using the URL provided here and adding the dataset id, subject id," " and filename. For example, in OpenNeuro (OpenNeuro is supported by default but will serve as an example) the base AWS S3" - " URL is \'s3://openneuro.org\'. The URL then becomes (for example) " + " URL is 's3://openneuro.org'. The URL then becomes (for example) " " s3://openneuro.org/ds000002/sub-06/func/sub-06_task-probabilisticclassification_run-02_bold.nii.gz where this tool" - " has added \'ds000002/sub-06/[FILENAME] to the base AWS S3 URL.") - parser.add_argument('-dataset_string', dest='dataset_string', required=False, help="If -aws_url parameter is supplied" + " has added 'ds000002/sub-06/[FILENAME] to the base AWS S3 URL.", + ) + parser.add_argument( + "-dataset_string", + dest="dataset_string", + required=False, + help="If -aws_url parameter is supplied" " this parameter (-dataset_string) is required as it will be added to the aws_baseurl to retrieve images for each" - " subject and file. For example, if -aws_baseurl is \'s3://davedata.org \' and -dataset_string is \'dataset1\' then" + " subject and file. For example, if -aws_baseurl is 's3://davedata.org ' and -dataset_string is 'dataset1' then" " the AWS S3 url for sub-1 and file sub1-task-rest_run-1_bold.nii.gz would be: " - " \'s3://davedata.org/dataset1/sub-1/[anat | func | dwi/sub1-task-rest_run-1_bold.nii.gz\'") + " 's3://davedata.org/dataset1/sub-1/[anat | func | dwi/sub1-task-rest_run-1_bold.nii.gz'", + ) args = parser.parse_args() # check some argument dependencies if args.aws_url and not args.dataset_string: - print("ERROR! You must include a -dataset_string if you supplied the -aws_baseurl. If there is no dataset" - " string in your AWS S3 urls then just supply -aws_baseurl with nothing after it.") + print( + "ERROR! You must include a -dataset_string if you supplied the -aws_baseurl. If there is no dataset" + " string in your AWS S3 urls then just supply -aws_baseurl with nothing after it." + ) print(args.print_help()) exit(-1) @@ -693,72 +906,93 @@ def main(argv): if not isdir(output_directory): mkdir(path=output_directory) - - #try to read RDF file + # try to read RDF file print("Guessing RDF file format...") - format_found=False - for format in 'turtle','xml','n3','trix','rdfa': + format_found = False + for format in "turtle", "xml", "n3", "trix", "rdfa": try: print("Reading RDF file as %s..." % format) - #load NIDM graph into NIDM-Exp API objects + # load NIDM graph into NIDM-Exp API objects nidm_project = read_nidm(rdf_file) # temporary save nidm_project - with open("/Users/dbkeator/Downloads/nidm.ttl", 'w') as f: + with open("/Users/dbkeator/Downloads/nidm.ttl", "w") as f: print(nidm_project.serializeTurtle(), file=f) print("RDF file successfully read") - format_found=True + format_found = True break except Exception: - print("File: %s appears to be an invalid %s RDF file" % (rdf_file,format)) + print("File: %s appears to be an invalid %s RDF file" % (rdf_file, format)) if not format_found: - print("File doesn't appear to be a valid RDF format supported by Python RDFLib! Please check input file") + print( + "File doesn't appear to be a valid RDF format supported by Python RDFLib! Please check input file" + ) print("exiting...") exit(-1) - # if not os.path.isdir(join(output_directory,os.path.splitext(args.rdf_file)[0])): - # os.mkdir(join(output_directory,os.path.splitext(args.rdf_file)[0])) + # if not os.path.isdir(join(output_directory,os.path.splitext(args.rdf_file)[0])): + # os.mkdir(join(output_directory,os.path.splitext(args.rdf_file)[0])) - #convert Project NIDM object -> dataset_description.json file - NIDMProject2BIDSDatasetDescriptor(nidm_project,output_directory) + # convert Project NIDM object -> dataset_description.json file + NIDMProject2BIDSDatasetDescriptor(nidm_project, output_directory) - #create participants.tsv file. In BIDS datasets there is no specification for how many or which type of assessment - #variables might be in this file. The specification does mention a minimum participant_id which indexes each of the - #subjects in the BIDS dataset. + # create participants.tsv file. In BIDS datasets there is no specification for how many or which type of assessment + # variables might be in this file. The specification does mention a minimum participant_id which indexes each of the + # subjects in the BIDS dataset. # - #if parameter -parts_field is defined then the variables listed will be fuzzy matched to the URIs in the NIDM file - #and added to the participants.tsv file + # if parameter -parts_field is defined then the variables listed will be fuzzy matched to the URIs in the NIDM file + # and added to the participants.tsv file - #use RDFLib here for temporary graph making query easier + # use RDFLib here for temporary graph making query easier rdf_graph = Graph() - rdf_graph_parse = rdf_graph.parse(source=StringIO(nidm_project.serializeTurtle()), format='turtle') + rdf_graph_parse = rdf_graph.parse( + source=StringIO(nidm_project.serializeTurtle()), format="turtle" + ) # temporary write out turtle file for testing # rdf_graph_parse.serialize(destination="/Users/dbkeator/Downloads/ds000117.ttl", format='turtle') - - #create participants file - CreateBIDSParticipantFile(rdf_graph_parse, join(output_directory, "participants"), args.part_fields) + # create participants file + CreateBIDSParticipantFile( + rdf_graph_parse, join(output_directory, "participants"), args.part_fields + ) # get nidm:Project prov:Location # first get nidm:Project UUIDs project_uuid = GetProjectsUUID([rdf_file], output_file=None) project_location = [] for uuid in project_uuid: - project_location.append(GetProjectLocation(nidm_file_list=[rdf_file], project_uuid=uuid)) - - #creating BIDS hierarchy with requested scans - if args.anat==True: - ProcessFiles(graph=rdf_graph_parse, scan_type=Constants.NIDM_MRI_ANATOMIC_SCAN.uri, - output_directory=output_directory, project_location=project_location, args=args) + project_location.append( + GetProjectLocation(nidm_file_list=[rdf_file], project_uuid=uuid) + ) + + # creating BIDS hierarchy with requested scans + if args.anat == True: + ProcessFiles( + graph=rdf_graph_parse, + scan_type=Constants.NIDM_MRI_ANATOMIC_SCAN.uri, + output_directory=output_directory, + project_location=project_location, + args=args, + ) if args.func == True: - ProcessFiles(graph=rdf_graph_parse, scan_type=Constants.NIDM_MRI_FUNCTION_SCAN.uri, - output_directory=output_directory, project_location=project_location, args=args) + ProcessFiles( + graph=rdf_graph_parse, + scan_type=Constants.NIDM_MRI_FUNCTION_SCAN.uri, + output_directory=output_directory, + project_location=project_location, + args=args, + ) if args.dwi == True: - ProcessFiles(graph=rdf_graph_parse, scan_type = Constants.NIDM_MRI_DIFFUSION_TENSOR.uri , - output_directory=output_directory, project_location=project_location, args=args) + ProcessFiles( + graph=rdf_graph_parse, + scan_type=Constants.NIDM_MRI_DIFFUSION_TENSOR.uri, + output_directory=output_directory, + project_location=project_location, + args=args, + ) -if __name__ == "__main__": - main(sys.argv[1:]) +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/nidm/experiment/tools/nidm_affinity_propagation.py b/nidm/experiment/tools/nidm_affinity_propagation.py index 1a9b2167..91375c9f 100644 --- a/nidm/experiment/tools/nidm_affinity_propagation.py +++ b/nidm/experiment/tools/nidm_affinity_propagation.py @@ -1,34 +1,46 @@ +import csv import os import tempfile -import pandas as pd -import csv -from patsy.highlevel import dmatrices -from nidm.experiment.Query import GetProjectsUUID import click +import matplotlib.pyplot as plt +from nidm.experiment.Query import GetProjectsUUID from nidm.experiment.tools.click_base import cli from nidm.experiment.tools.rest import RestParser import numpy as np -import matplotlib.pyplot as plt +import pandas as pd +from patsy.highlevel import dmatrices import seaborn as sns -from sklearn.cluster import KMeans +from sklearn import metrics, preprocessing +from sklearn.cluster import AffinityPropagation, KMeans from sklearn.decomposition import PCA -from sklearn.metrics import silhouette_score, adjusted_rand_score +from sklearn.metrics import adjusted_rand_score, silhouette_score from sklearn.pipeline import Pipeline from sklearn.preprocessing import LabelEncoder, MinMaxScaler -from sklearn.cluster import AffinityPropagation -from sklearn import metrics -from sklearn import preprocessing + @cli.command() -@click.option("--nidm_file_list", "-nl", required=True, - help="A comma separated list of NIDM files with full path") -@click.option("-variables", required=False, - help="This parameter is for the variables the user would like to complete the k-means algorithm on.\nThe way this looks in the command is python3 nidm_kmeans.py -nl MTdemog_aseg_v2.ttl -v \"fs_003343,age*sex,sex,age,group,age*group,bmi\"") -@click.option("--output_file", "-o", required=False, - help="Optional output file (TXT) to store results of the linear regression, contrast, and regularization") +@click.option( + "--nidm_file_list", + "-nl", + required=True, + help="A comma separated list of NIDM files with full path", +) +@click.option( + "-variables", + required=False, + help='This parameter is for the variables the user would like to complete the k-means algorithm on.\nThe way this looks in the command is python3 nidm_kmeans.py -nl MTdemog_aseg_v2.ttl -v "fs_003343,age*sex,sex,age,group,age*group,bmi"', +) +@click.option( + "--output_file", + "-o", + required=False, + help="Optional output file (TXT) to store results of the linear regression, contrast, and regularization", +) def full_ap(nidm_file_list, output_file, variables): global v # Needed to do this because the code only used the parameters in the first method, meaning I had to move it all to method 1. - v = variables.strip() # used in data_aggregation, linreg(), spaces stripped from left and right + v = ( + variables.strip() + ) # used in data_aggregation, linreg(), spaces stripped from left and right global o # used in dataparsing() o = output_file global n # used in data_aggregation() @@ -39,16 +51,18 @@ def full_ap(nidm_file_list, output_file, variables): def data_aggregation(): # all data from all the files is collected - """ This function provides query support for NIDM graphs. """ + """This function provides query support for NIDM graphs.""" # query result list results = [] # if there is a CDE file list, seed the CDE cache if v: # ex: fs_00343 ~ age + sex + group - print("***********************************************************************************************************") - command = "python nidm_kmeans.py -nl " + n + " -variables \"" + v + "\" " + print( + "***********************************************************************************************************" + ) + command = "python nidm_kmeans.py -nl " + n + ' -variables "' + v + '" ' print("Your command was: " + command) - if (o is not None): + if o is not None: f = open(o, "w") f.write("Your command was " + command) f.close() @@ -77,90 +91,140 @@ def data_aggregation(): # all data from all the files is collected project = GetProjectsUUID([nidm_file]) # split the model into its constituent variables global full_model_variable_list - full_model_variable_list=[] + full_model_variable_list = [] global model_list model_list = v.split(",") - for i in range(len(model_list)): # here, we remove any leading or trailing spaces + for i in range( + len(model_list) + ): # here, we remove any leading or trailing spaces model_list[i] = model_list[i].strip() global vars # used in dataparsing() vars = "" for i in range(len(model_list) - 1, -1, -1): - full_model_variable_list.append(model_list[i]) # will be used in the regularization, but we need the full list - if "*" in model_list[i]: # removing the star term from the columns we're about to pull from data + full_model_variable_list.append( + model_list[i] + ) # will be used in the regularization, but we need the full list + if ( + "*" in model_list[i] + ): # removing the star term from the columns we're about to pull from data model_list.pop(i) else: vars = vars + model_list[i] + "," - vars = vars[0:len(vars) - 1] - uri = "/projects/" + project[0].toPython().split("/")[-1] + "?fields=" + vars + vars = vars[0 : len(vars) - 1] + uri = ( + "/projects/" + project[0].toPython().split("/")[-1] + "?fields=" + vars + ) # get fields output from each file and concatenate df_list_holder[count].append(pd.DataFrame(restParser.run([nidm_file], uri))) df = pd.concat(df_list_holder[count]) - with tempfile.NamedTemporaryFile(delete=False) as temp: # turns the dataframe into a temporary csv - df.to_csv(temp.name + '.csv') + with tempfile.NamedTemporaryFile( + delete=False + ) as temp: # turns the dataframe into a temporary csv + df.to_csv(temp.name + ".csv") temp.close() - data = list(csv.reader(open(temp.name + '.csv'))) # makes the csv a 2D list to make it easier to call the contents of certain cells - numcols = (len(data) - 1) // (len(model_list)) # Finds the number of columns in the original dataframe + data = list( + csv.reader(open(temp.name + ".csv")) + ) # makes the csv a 2D list to make it easier to call the contents of certain cells + numcols = (len(data) - 1) // ( + len(model_list) + ) # Finds the number of columns in the original dataframe global condensed_data # also used in linreg() - condensed_data_holder[count] = [[0] * (len(model_list))] # makes an array 1 row by the number of necessary columns - for i in range(numcols): # makes the 2D array big enough to store all of the necessary values in the edited dataset + condensed_data_holder[count] = [ + [0] * (len(model_list)) + ] # makes an array 1 row by the number of necessary columns + for i in range( + numcols + ): # makes the 2D array big enough to store all of the necessary values in the edited dataset condensed_data_holder[count].append([0] * (len(model_list))) - for i in range(len(model_list)): # stores the independent variable names in the first row + for i in range( + len(model_list) + ): # stores the independent variable names in the first row condensed_data_holder[count][0][i] = model_list[i] numrows = 1 # begins at the first row to add data - fieldcolumn = 0 # the column the variable name is in in the original dataset + fieldcolumn = ( + 0 # the column the variable name is in in the original dataset + ) valuecolumn = 0 # the column the value is in in the original dataset datacolumn = 0 # if it is identified by the dataElement name instead of the field's name not_found_list = [] for i in range(len(data[0])): - if data[0][i] == 'sourceVariable': # finds the column where the variable names are + if ( + data[0][i] == "sourceVariable" + ): # finds the column where the variable names are fieldcolumn = i - elif data[0][i] == 'source_variable': # finds the column where the variable names are + elif ( + data[0][i] == "source_variable" + ): # finds the column where the variable names are fieldcolumn = i - elif data[0][i] == 'isAbout': + elif data[0][i] == "isAbout": aboutcolumn = i - elif data[0][i] == 'label': + elif data[0][i] == "label": namecolumn = i # finds the column where the variable names are - elif data[0][i] == 'value': + elif data[0][i] == "value": valuecolumn = i # finds the column where the values are - elif data[0][i] == 'dataElement': # finds the column where the data element is if necessary + elif ( + data[0][i] == "dataElement" + ): # finds the column where the data element is if necessary datacolumn = i for i in range( - len(condensed_data_holder[count][ - 0])): # starts iterating through the dataset, looking for the name in that - for j in range(1, len(data)): # column, so it can append the values under the proper variables + len(condensed_data_holder[count][0]) + ): # starts iterating through the dataset, looking for the name in that + for j in range( + 1, len(data) + ): # column, so it can append the values under the proper variables try: split_url = condensed_data_holder[count][0][i].split("/") - if data[j][fieldcolumn] == condensed_data_holder[count][0][ - i]: # in the dataframe, the name is in column 3 + if ( + data[j][fieldcolumn] == condensed_data_holder[count][0][i] + ): # in the dataframe, the name is in column 3 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][aboutcolumn] == condensed_data_holder[count][0][ - i]: + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif data[j][aboutcolumn] == condensed_data_holder[count][0][i]: condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][aboutcolumn] == split_url[ - len(split_url) - 1]: # this is in case the uri only works by querying the part after the last backslash + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + data[j][aboutcolumn] == split_url[len(split_url) - 1] + ): # this is in case the uri only works by querying the part after the last backslash condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values - elif data[j][namecolumn] == condensed_data_holder[count][0][ - i]: # in the dataframe, the name is in column 12 + elif ( + data[j][namecolumn] == condensed_data_holder[count][0][i] + ): # in the dataframe, the name is in column 12 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif condensed_data_holder[count][0][i] == data[j][ - datacolumn]: # in the dataframe, the name is in column 9 + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + condensed_data_holder[count][0][i] == data[j][datacolumn] + ): # in the dataframe, the name is in column 9 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values except IndexError: numrows = numrows + 1 numrows = 1 # resets to the first row for the next variable temp_list = condensed_data_holder[count] - for j in range(len(temp_list[0]) - 1, 0,-1): # if the software appends a column with 0 as the heading, it removes this null column + for j in range( + len(temp_list[0]) - 1, 0, -1 + ): # if the software appends a column with 0 as the heading, it removes this null column if temp_list[0][j] == "0" or temp_list[0][j] == "NaN": for row in condensed_data_holder[count]: row.pop(j) @@ -175,28 +239,37 @@ def data_aggregation(): # all data from all the files is collected count1 = 0 for i in range(len(condensed_data_holder[count][0])): if " " in condensed_data_holder[count][0][i]: - condensed_data_holder[count][0][i] = condensed_data_holder[count][0][i].replace(" ", "_") + condensed_data_holder[count][0][i] = condensed_data_holder[count][ + 0 + ][i].replace(" ", "_") for i in range(len(vars)): if " " in vars[i]: vars[i] = vars[i].replace(" ", "_") count = count + 1 if len(not_found_list) > 0: print( - "***********************************************************************************************************") + "***********************************************************************************************************" + ) print() print("Your variables were " + v) print() print( - "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables.") - if (o is not None): + "The following variables were not found in " + + nidm_file + + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." + ) + if o is not None: f = open(o, "a") f.write("Your variables were " + v) f.write( - "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables.") + "The following variables were not found in " + + nidm_file + + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." + ) f.close() for i in range(0, len(not_found_list)): print(str(i + 1) + ". " + not_found_list[i]) - if (o is not None): + if o is not None: f = open(o, "a") f.write(str(i + 1) + ". " + not_found_list[i]) f.close() @@ -207,30 +280,36 @@ def data_aggregation(): # all data from all the files is collected if not_found_count > 0: exit(1) - else: print("ERROR: No query parameter provided. See help:") print() os.system("pynidm query --help") exit(1) -def dataparsing(): #The data is changed to a format that is usable by the linear regression method + +def dataparsing(): # The data is changed to a format that is usable by the linear regression method global condensed_data condensed_data = [] for i in range(0, len(file_list)): condensed_data = condensed_data + condensed_data_holder[i] - x = pd.read_csv(opencsv(condensed_data)) # changes the dataframe to a csv to make it easier to work with + x = pd.read_csv( + opencsv(condensed_data) + ) # changes the dataframe to a csv to make it easier to work with x.head() # prints what the csv looks like x.dtypes # checks data format obj_df = x.select_dtypes # puts all the variables in a dataset x.shape # says number of rows and columns in form of tuple x.describe() # says dataset statistics obj_df = x.select_dtypes( - include=['object']).copy() # takes everything that is an object (not float or int) and puts it in a new dataset + include=["object"] + ).copy() # takes everything that is an object (not float or int) and puts it in a new dataset obj_df.head() # prints the new dataset - int_df = x.select_dtypes(include=['int64']).copy() # takes everything that is an int and puts it in a new dataset + int_df = x.select_dtypes( + include=["int64"] + ).copy() # takes everything that is an int and puts it in a new dataset float_df = x.select_dtypes( - include=['float64']).copy() # takes everything that is a float and puts it in a new dataset + include=["float64"] + ).copy() # takes everything that is a float and puts it in a new dataset df_int_float = pd.concat([float_df, int_df], axis=1) stringvars = [] # starts a list that will store all variables that are not numbers for i in range(1, len(condensed_data)): # goes through each variable @@ -238,30 +317,42 @@ def dataparsing(): #The data is changed to a format that is usable by the linear try: # if the value of the field can be turned into a float (is numerical) float(condensed_data[i][j]) # this means it's a number except ValueError: # if it can't be (is a string) - if condensed_data[0][ - j] not in stringvars: # adds the variable name to the list if it isn't there already + if ( + condensed_data[0][j] not in stringvars + ): # adds the variable name to the list if it isn't there already stringvars.append(condensed_data[0][j]) - le = preprocessing.LabelEncoder() # anything involving le shows the encoding of categorical variables + le = ( + preprocessing.LabelEncoder() + ) # anything involving le shows the encoding of categorical variables for i in range(len(stringvars)): le.fit(obj_df[stringvars[i]].astype(str)) - obj_df_trf = obj_df.astype(str).apply(le.fit_transform) # transforms the categorical variables into numbers. + obj_df_trf = obj_df.astype(str).apply( + le.fit_transform + ) # transforms the categorical variables into numbers. global df_final # also used in linreg() if not obj_df_trf.empty: - df_final = pd.concat([df_int_float, obj_df_trf], axis=1) # join_axes=[df_int_float.index]) + df_final = pd.concat( + [df_int_float, obj_df_trf], axis=1 + ) # join_axes=[df_int_float.index]) else: df_final = df_int_float df_final.head() # shows the final dataset with all the encoding print(df_final) # prints the final dataset print() - print("***********************************************************************************************************") + print( + "***********************************************************************************************************" + ) print() - if (o is not None): + if o is not None: f = open(o, "a") f.write(df_final.to_string(header=True, index=True)) f.write( - "\n\n***********************************************************************************************************") + "\n\n***********************************************************************************************************" + ) f.write("\n\nModel Results: ") f.close() + + def ap(): index = 0 global levels # also used in contrasting() @@ -274,8 +365,8 @@ def ap(): # Beginning of the linear regression global X - #global y - #Unsure on how to proceed here with interacting variables, since I'm sure dmatrices won't work + # global y + # Unsure on how to proceed here with interacting variables, since I'm sure dmatrices won't work scaler = MinMaxScaler() @@ -286,41 +377,50 @@ def ap(): X = df_final[model_list] af = AffinityPropagation(preference=-50).fit(X) - cluster_center_indices=af.cluster_centers_indices_ + cluster_center_indices = af.cluster_centers_indices_ labels = af.labels_ n_clusters_ = len(cluster_center_indices) - print('Estimated number of clusters: %d' % n_clusters_) - #print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) - #print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) - #print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) - #print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) - #print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) - print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric='sqeuclidean')) + print("Estimated number of clusters: %d" % n_clusters_) + # print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) + # print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) + # print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) + # print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) + # print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) + print( + "Silhouette Coefficient: %0.3f" + % metrics.silhouette_score(X, labels, metric="sqeuclidean") + ) - sns.scatterplot(data=X, x=model_list[0], y=model_list[1], hue=af, palette = "gnuplot") + sns.scatterplot(data=X, x=model_list[0], y=model_list[1], hue=af, palette="gnuplot") plt.xlabel(model_list[1]) plt.ylabel(model_list[0]) title = "Clustering results of " for i in range(len(model_list)): title = title + model_list[i] + "," - title = title[0:len(title)-1] + title = title[0 : len(title) - 1] plt.title(title) plt.show() - if (o is not None): + if o is not None: f = open(o, "a") f.close() + + def opencsv(data): """saves a list of lists as a csv and opens""" - import tempfile - import os import csv - handle, fn = tempfile.mkstemp(suffix='.csv') - with os.fdopen(handle,"w", encoding='utf8',errors='surrogateescape',newline='') as f: + import os + import tempfile + + handle, fn = tempfile.mkstemp(suffix=".csv") + with os.fdopen( + handle, "w", encoding="utf8", errors="surrogateescape", newline="" + ) as f: writer = csv.writer(f) writer.writerows(data) return fn + # it can be used calling the script `python nidm_query.py -nl ... -q .. if __name__ == "__main__": full_ap() diff --git a/nidm/experiment/tools/nidm_agglomerative_clustering.py b/nidm/experiment/tools/nidm_agglomerative_clustering.py index 094bfe68..865f82c0 100644 --- a/nidm/experiment/tools/nidm_agglomerative_clustering.py +++ b/nidm/experiment/tools/nidm_agglomerative_clustering.py @@ -1,36 +1,47 @@ +import csv import os import tempfile -import pandas as pd -import csv -from patsy.highlevel import dmatrices -from nidm.experiment.Query import GetProjectsUUID import click +import matplotlib.pyplot as plt +from nidm.experiment.Query import GetProjectsUUID from nidm.experiment.tools.click_base import cli from nidm.experiment.tools.rest import RestParser import numpy as np -import matplotlib.pyplot as plt -import seaborn as sns -from sklearn.cluster import AgglomerativeClustering +import pandas as pd +from patsy.highlevel import dmatrices import scipy.cluster.hierarchy as sch -from sklearn.cluster import KMeans +import seaborn as sns +from sklearn import metrics, preprocessing +from sklearn.cluster import AffinityPropagation, AgglomerativeClustering, KMeans from sklearn.decomposition import PCA -from sklearn.metrics import silhouette_score, adjusted_rand_score +from sklearn.metrics import adjusted_rand_score, silhouette_score from sklearn.pipeline import Pipeline from sklearn.preprocessing import LabelEncoder, MinMaxScaler -from sklearn.cluster import AffinityPropagation -from sklearn import metrics -from sklearn import preprocessing + @cli.command() -@click.option("--nidm_file_list", "-nl", required=True, - help="A comma separated list of NIDM files with full path") -@click.option("-variables", required=False, - help="This parameter is for the variables the user would like to complete the k-means algorithm on.\nThe way this looks in the command is python3 nidm_kmeans.py -nl MTdemog_aseg_v2.ttl -v \"fs_003343,age*sex,sex,age,group,age*group,bmi\"") -@click.option("--output_file", "-o", required=False, - help="Optional output file (TXT) to store results of the linear regression, contrast, and regularization") +@click.option( + "--nidm_file_list", + "-nl", + required=True, + help="A comma separated list of NIDM files with full path", +) +@click.option( + "-variables", + required=False, + help='This parameter is for the variables the user would like to complete the k-means algorithm on.\nThe way this looks in the command is python3 nidm_kmeans.py -nl MTdemog_aseg_v2.ttl -v "fs_003343,age*sex,sex,age,group,age*group,bmi"', +) +@click.option( + "--output_file", + "-o", + required=False, + help="Optional output file (TXT) to store results of the linear regression, contrast, and regularization", +) def full_ac(nidm_file_list, output_file, variables): global v # Needed to do this because the code only used the parameters in the first method, meaning I had to move it all to method 1. - v = variables.strip() # used in data_aggregation, linreg(), spaces stripped from left and right + v = ( + variables.strip() + ) # used in data_aggregation, linreg(), spaces stripped from left and right global o # used in dataparsing() o = output_file global n # used in data_aggregation() @@ -41,16 +52,18 @@ def full_ac(nidm_file_list, output_file, variables): def data_aggregation(): # all data from all the files is collected - """ This function provides query support for NIDM graphs. """ + """This function provides query support for NIDM graphs.""" # query result list results = [] # if there is a CDE file list, seed the CDE cache if v: # ex: fs_00343 ~ age + sex + group - print("***********************************************************************************************************") - command = "python nidm_kmeans.py -nl " + n + " -variables \"" + v + "\" " + print( + "***********************************************************************************************************" + ) + command = "python nidm_kmeans.py -nl " + n + ' -variables "' + v + '" ' print("Your command was: " + command) - if (o is not None): + if o is not None: f = open(o, "w") f.write("Your command was " + command) f.close() @@ -79,90 +92,140 @@ def data_aggregation(): # all data from all the files is collected project = GetProjectsUUID([nidm_file]) # split the model into its constituent variables global full_model_variable_list - full_model_variable_list=[] + full_model_variable_list = [] global model_list model_list = v.split(",") - for i in range(len(model_list)): # here, we remove any leading or trailing spaces + for i in range( + len(model_list) + ): # here, we remove any leading or trailing spaces model_list[i] = model_list[i].strip() global vars # used in dataparsing() vars = "" for i in range(len(model_list) - 1, -1, -1): - full_model_variable_list.append(model_list[i]) # will be used in the regularization, but we need the full list - if "*" in model_list[i]: # removing the star term from the columns we're about to pull from data + full_model_variable_list.append( + model_list[i] + ) # will be used in the regularization, but we need the full list + if ( + "*" in model_list[i] + ): # removing the star term from the columns we're about to pull from data model_list.pop(i) else: vars = vars + model_list[i] + "," - vars = vars[0:len(vars) - 1] - uri = "/projects/" + project[0].toPython().split("/")[-1] + "?fields=" + vars + vars = vars[0 : len(vars) - 1] + uri = ( + "/projects/" + project[0].toPython().split("/")[-1] + "?fields=" + vars + ) # get fields output from each file and concatenate df_list_holder[count].append(pd.DataFrame(restParser.run([nidm_file], uri))) df = pd.concat(df_list_holder[count]) - with tempfile.NamedTemporaryFile(delete=False) as temp: # turns the dataframe into a temporary csv - df.to_csv(temp.name + '.csv') + with tempfile.NamedTemporaryFile( + delete=False + ) as temp: # turns the dataframe into a temporary csv + df.to_csv(temp.name + ".csv") temp.close() - data = list(csv.reader(open(temp.name + '.csv'))) # makes the csv a 2D list to make it easier to call the contents of certain cells - numcols = (len(data) - 1) // (len(model_list)) # Finds the number of columns in the original dataframe + data = list( + csv.reader(open(temp.name + ".csv")) + ) # makes the csv a 2D list to make it easier to call the contents of certain cells + numcols = (len(data) - 1) // ( + len(model_list) + ) # Finds the number of columns in the original dataframe global condensed_data # also used in linreg() - condensed_data_holder[count] = [[0] * (len(model_list))] # makes an array 1 row by the number of necessary columns - for i in range(numcols): # makes the 2D array big enough to store all of the necessary values in the edited dataset + condensed_data_holder[count] = [ + [0] * (len(model_list)) + ] # makes an array 1 row by the number of necessary columns + for i in range( + numcols + ): # makes the 2D array big enough to store all of the necessary values in the edited dataset condensed_data_holder[count].append([0] * (len(model_list))) - for i in range(len(model_list)): # stores the independent variable names in the first row + for i in range( + len(model_list) + ): # stores the independent variable names in the first row condensed_data_holder[count][0][i] = model_list[i] numrows = 1 # begins at the first row to add data - fieldcolumn = 0 # the column the variable name is in in the original dataset + fieldcolumn = ( + 0 # the column the variable name is in in the original dataset + ) valuecolumn = 0 # the column the value is in in the original dataset datacolumn = 0 # if it is identified by the dataElement name instead of the field's name not_found_list = [] for i in range(len(data[0])): - if data[0][i] == 'sourceVariable': # finds the column where the variable names are + if ( + data[0][i] == "sourceVariable" + ): # finds the column where the variable names are fieldcolumn = i - elif data[0][i] == 'source_variable': # finds the column where the variable names are + elif ( + data[0][i] == "source_variable" + ): # finds the column where the variable names are fieldcolumn = i - elif data[0][i] == 'isAbout': + elif data[0][i] == "isAbout": aboutcolumn = i - elif data[0][i] == 'label': + elif data[0][i] == "label": namecolumn = i # finds the column where the variable names are - elif data[0][i] == 'value': + elif data[0][i] == "value": valuecolumn = i # finds the column where the values are - elif data[0][i] == 'dataElement': # finds the column where the data element is if necessary + elif ( + data[0][i] == "dataElement" + ): # finds the column where the data element is if necessary datacolumn = i for i in range( - len(condensed_data_holder[count][ - 0])): # starts iterating through the dataset, looking for the name in that - for j in range(1, len(data)): # column, so it can append the values under the proper variables + len(condensed_data_holder[count][0]) + ): # starts iterating through the dataset, looking for the name in that + for j in range( + 1, len(data) + ): # column, so it can append the values under the proper variables try: split_url = condensed_data_holder[count][0][i].split("/") - if data[j][fieldcolumn] == condensed_data_holder[count][0][ - i]: # in the dataframe, the name is in column 3 + if ( + data[j][fieldcolumn] == condensed_data_holder[count][0][i] + ): # in the dataframe, the name is in column 3 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][aboutcolumn] == condensed_data_holder[count][0][ - i]: + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif data[j][aboutcolumn] == condensed_data_holder[count][0][i]: condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][aboutcolumn] == split_url[ - len(split_url) - 1]: # this is in case the uri only works by querying the part after the last backslash + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + data[j][aboutcolumn] == split_url[len(split_url) - 1] + ): # this is in case the uri only works by querying the part after the last backslash condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values - elif data[j][namecolumn] == condensed_data_holder[count][0][ - i]: # in the dataframe, the name is in column 12 + elif ( + data[j][namecolumn] == condensed_data_holder[count][0][i] + ): # in the dataframe, the name is in column 12 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif condensed_data_holder[count][0][i] == data[j][ - datacolumn]: # in the dataframe, the name is in column 9 + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + condensed_data_holder[count][0][i] == data[j][datacolumn] + ): # in the dataframe, the name is in column 9 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values except IndexError: numrows = numrows + 1 numrows = 1 # resets to the first row for the next variable temp_list = condensed_data_holder[count] - for j in range(len(temp_list[0]) - 1, 0,-1): # if the software appends a column with 0 as the heading, it removes this null column + for j in range( + len(temp_list[0]) - 1, 0, -1 + ): # if the software appends a column with 0 as the heading, it removes this null column if temp_list[0][j] == "0" or temp_list[0][j] == "NaN": for row in condensed_data_holder[count]: row.pop(j) @@ -177,28 +240,37 @@ def data_aggregation(): # all data from all the files is collected count1 = 0 for i in range(len(condensed_data_holder[count][0])): if " " in condensed_data_holder[count][0][i]: - condensed_data_holder[count][0][i] = condensed_data_holder[count][0][i].replace(" ", "_") + condensed_data_holder[count][0][i] = condensed_data_holder[count][ + 0 + ][i].replace(" ", "_") for i in range(len(vars)): if " " in vars[i]: vars[i] = vars[i].replace(" ", "_") count = count + 1 if len(not_found_list) > 0: print( - "***********************************************************************************************************") + "***********************************************************************************************************" + ) print() print("Your variables were " + v) print() print( - "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables.") - if (o is not None): + "The following variables were not found in " + + nidm_file + + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." + ) + if o is not None: f = open(o, "a") f.write("Your variables were " + v) f.write( - "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables.") + "The following variables were not found in " + + nidm_file + + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." + ) f.close() for i in range(0, len(not_found_list)): print(str(i + 1) + ". " + not_found_list[i]) - if (o is not None): + if o is not None: f = open(o, "a") f.write(str(i + 1) + ". " + not_found_list[i]) f.close() @@ -209,30 +281,36 @@ def data_aggregation(): # all data from all the files is collected if not_found_count > 0: exit(1) - else: print("ERROR: No query parameter provided. See help:") print() os.system("pynidm query --help") exit(1) -def dataparsing(): #The data is changed to a format that is usable by the linear regression method + +def dataparsing(): # The data is changed to a format that is usable by the linear regression method global condensed_data condensed_data = [] for i in range(0, len(file_list)): condensed_data = condensed_data + condensed_data_holder[i] - x = pd.read_csv(opencsv(condensed_data)) # changes the dataframe to a csv to make it easier to work with + x = pd.read_csv( + opencsv(condensed_data) + ) # changes the dataframe to a csv to make it easier to work with x.head() # prints what the csv looks like x.dtypes # checks data format obj_df = x.select_dtypes # puts all the variables in a dataset x.shape # says number of rows and columns in form of tuple x.describe() # says dataset statistics obj_df = x.select_dtypes( - include=['object']).copy() # takes everything that is an object (not float or int) and puts it in a new dataset + include=["object"] + ).copy() # takes everything that is an object (not float or int) and puts it in a new dataset obj_df.head() # prints the new dataset - int_df = x.select_dtypes(include=['int64']).copy() # takes everything that is an int and puts it in a new dataset + int_df = x.select_dtypes( + include=["int64"] + ).copy() # takes everything that is an int and puts it in a new dataset float_df = x.select_dtypes( - include=['float64']).copy() # takes everything that is a float and puts it in a new dataset + include=["float64"] + ).copy() # takes everything that is a float and puts it in a new dataset df_int_float = pd.concat([float_df, int_df], axis=1) stringvars = [] # starts a list that will store all variables that are not numbers for i in range(1, len(condensed_data)): # goes through each variable @@ -240,30 +318,42 @@ def dataparsing(): #The data is changed to a format that is usable by the linear try: # if the value of the field can be turned into a float (is numerical) float(condensed_data[i][j]) # this means it's a number except ValueError: # if it can't be (is a string) - if condensed_data[0][ - j] not in stringvars: # adds the variable name to the list if it isn't there already + if ( + condensed_data[0][j] not in stringvars + ): # adds the variable name to the list if it isn't there already stringvars.append(condensed_data[0][j]) - le = preprocessing.LabelEncoder() # anything involving le shows the encoding of categorical variables + le = ( + preprocessing.LabelEncoder() + ) # anything involving le shows the encoding of categorical variables for i in range(len(stringvars)): le.fit(obj_df[stringvars[i]].astype(str)) - obj_df_trf = obj_df.astype(str).apply(le.fit_transform) # transforms the categorical variables into numbers. + obj_df_trf = obj_df.astype(str).apply( + le.fit_transform + ) # transforms the categorical variables into numbers. global df_final # also used in linreg() if not obj_df_trf.empty: - df_final = pd.concat([df_int_float, obj_df_trf], axis=1) # join_axes=[df_int_float.index]) + df_final = pd.concat( + [df_int_float, obj_df_trf], axis=1 + ) # join_axes=[df_int_float.index]) else: df_final = df_int_float df_final.head() # shows the final dataset with all the encoding print(df_final) # prints the final dataset print() - print("***********************************************************************************************************") + print( + "***********************************************************************************************************" + ) print() - if (o is not None): + if o is not None: f = open(o, "a") f.write(df_final.to_string(header=True, index=True)) f.write( - "\n\n***********************************************************************************************************") + "\n\n***********************************************************************************************************" + ) f.write("\n\nModel Results: ") f.close() + + def ac(): index = 0 global levels # also used in contrasting() @@ -276,8 +366,8 @@ def ac(): # Beginning of the linear regression global X - #global y - #Unsure on how to proceed here with interacting variables, since I'm sure dmatrices won't work + # global y + # Unsure on how to proceed here with interacting variables, since I'm sure dmatrices won't work scaler = MinMaxScaler() @@ -285,34 +375,41 @@ def ac(): scaler.fit(df_final[[model_list[i]]]) df_final[[model_list[i]]] = scaler.transform(df_final[[model_list[i]]]) - X = df_final[model_list] #going to need to find out how to do the correct number of clusters - dendrogram = sch.dendrogram(sch.linkage(X, method='ward')) - model = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward') + X = df_final[ + model_list + ] # going to need to find out how to do the correct number of clusters + dendrogram = sch.dendrogram(sch.linkage(X, method="ward")) + model = AgglomerativeClustering(n_clusters=5, affinity="euclidean", linkage="ward") model.fit(X) labels = model.labels_ - plt.scatter(X[labels == 0, 0], X[labels == 0, 1], s=50, marker='o', color='red') - plt.scatter(X[labels == 1, 0], X[labels == 1, 1], s=50, marker='o', color='blue') - plt.scatter(X[labels == 2, 0], X[labels == 2, 1], s=50, marker='o', color='green') - plt.scatter(X[labels == 3, 0], X[labels == 3, 1], s=50, marker='o', color='purple') - plt.scatter(X[labels == 4, 0], X[labels == 4, 1], s=50, marker='o', color='orange') + plt.scatter(X[labels == 0, 0], X[labels == 0, 1], s=50, marker="o", color="red") + plt.scatter(X[labels == 1, 0], X[labels == 1, 1], s=50, marker="o", color="blue") + plt.scatter(X[labels == 2, 0], X[labels == 2, 1], s=50, marker="o", color="green") + plt.scatter(X[labels == 3, 0], X[labels == 3, 1], s=50, marker="o", color="purple") + plt.scatter(X[labels == 4, 0], X[labels == 4, 1], s=50, marker="o", color="orange") plt.show() - - if (o is not None): + if o is not None: f = open(o, "a") f.close() + + def opencsv(data): """saves a list of lists as a csv and opens""" - import tempfile - import os import csv - handle, fn = tempfile.mkstemp(suffix='.csv') - with os.fdopen(handle,"w", encoding='utf8',errors='surrogateescape',newline='') as f: + import os + import tempfile + + handle, fn = tempfile.mkstemp(suffix=".csv") + with os.fdopen( + handle, "w", encoding="utf8", errors="surrogateescape", newline="" + ) as f: writer = csv.writer(f) writer.writerows(data) return fn + # it can be used calling the script `python nidm_query.py -nl ... -q .. if __name__ == "__main__": full_ac() diff --git a/nidm/experiment/tools/nidm_concat.py b/nidm/experiment/tools/nidm_concat.py index 3f13b430..ee00d17d 100644 --- a/nidm/experiment/tools/nidm_concat.py +++ b/nidm/experiment/tools/nidm_concat.py @@ -1,69 +1,72 @@ #!/usr/bin/env python -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # nidm_utils.py # License: Apache License, Version 2.0 -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # Date: 11-28-18 Coded by: David Keator (dbkeator@gmail.com) # Filename: nidm_utils.py # # Program description: Tools for working with NIDM-Experiment files # -#************************************************************************************** +# ************************************************************************************** # Development environment: Python - PyCharm IDE # -#************************************************************************************** +# ************************************************************************************** # System requirements: Python 3.X # Libraries: pybids, numpy, matplotlib, pandas, scipy, math, dateutil, datetime,argparse, # os,sys,getopt,csv -#************************************************************************************** +# ************************************************************************************** # Start date: 11-28-18 # Update history: # DATE MODIFICATION Who # # -#************************************************************************************** +# ************************************************************************************** # Programmer comments: # # -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** -import os,sys from argparse import ArgumentParser -from rdflib import Graph,util -from rdflib.tools import rdf2dot -from nidm.experiment.Utils import read_nidm -from nidm.experiment.Query import GetMergedGraph from io import StringIO -from os.path import basename,splitext +import os +from os.path import basename, splitext import subprocess -from graphviz import Source +import sys import tempfile - import click +from graphviz import Source +from nidm.experiment.Query import GetMergedGraph +from nidm.experiment.Utils import read_nidm from nidm.experiment.tools.click_base import cli +from rdflib import Graph, util +from rdflib.tools import rdf2dot + # adding click argument parsing @cli.command() -@click.option("--nidm_file_list", "-nl", required=True, - help="A comma separated list of NIDM files with full path") -@click.option("--out_file", "-o", required=True, - help="File to write concatenated NIDM files") - - +@click.option( + "--nidm_file_list", + "-nl", + required=True, + help="A comma separated list of NIDM files with full path", +) +@click.option( + "--out_file", "-o", required=True, help="File to write concatenated NIDM files" +) def concat(nidm_file_list, out_file): """ This function will concatenate NIDM files. Warning, no merging will be done so you may end up with multiple prov:agents with the same subject id if you're concatenating NIDM files from multiple visits of the same study. If you want to merge NIDM files on subject ID see pynidm merge """ - #create empty graph - graph = GetMergedGraph(nidm_file_list.split(',')) - graph.serialize(out_file, format='turtle') - + # create empty graph + graph = GetMergedGraph(nidm_file_list.split(",")) + graph.serialize(out_file, format="turtle") if __name__ == "__main__": - concat() + concat() diff --git a/nidm/experiment/tools/nidm_convert.py b/nidm/experiment/tools/nidm_convert.py index b34b89f9..d073c03e 100644 --- a/nidm/experiment/tools/nidm_convert.py +++ b/nidm/experiment/tools/nidm_convert.py @@ -1,104 +1,117 @@ #!/usr/bin/env python -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # nidm_utils.py # License: Apache License, Version 2.0 -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # Date: 11-28-18 Coded by: David Keator (dbkeator@gmail.com) # Filename: nidm_utils.py # # Program description: Tools for working with NIDM-Experiment files # -#************************************************************************************** +# ************************************************************************************** # Development environment: Python - PyCharm IDE # -#************************************************************************************** +# ************************************************************************************** # System requirements: Python 3.X # Libraries: pybids, numpy, matplotlib, pandas, scipy, math, dateutil, datetime,argparse, # os,sys,getopt,csv -#************************************************************************************** +# ************************************************************************************** # Start date: 11-28-18 # Update history: # DATE MODIFICATION Who # # -#************************************************************************************** +# ************************************************************************************** # Programmer comments: # # -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** -import os,sys from argparse import ArgumentParser -from rdflib import Graph,util -from rdflib.tools import rdf2dot -from nidm.experiment.Utils import read_nidm -from nidm.experiment.Query import GetMergedGraph from io import StringIO -from os.path import basename,splitext,join +import os +from os.path import basename, join, splitext import subprocess -from graphviz import Source +import sys import tempfile - import click +from graphviz import Source +from nidm.experiment.Query import GetMergedGraph +from nidm.experiment.Utils import read_nidm from nidm.experiment.tools.click_base import cli +from rdflib import Graph, util +from rdflib.tools import rdf2dot + # adding click argument parsing @cli.command() -@click.option("--nidm_file_list", "-nl", required=True, - help="A comma separated list of NIDM files with full path") -@click.option("--type", "-t", required=True,type=click.Choice(['turtle', 'jsonld', 'xml-rdf','n3','trig'], case_sensitive=False), - help="If parameter set then NIDM file will be exported as JSONLD") -@click.option("--outdir", "-out", required=False, - help="Optional directory to save converted NIDM file") - - - -def convert(nidm_file_list, type,outdir): +@click.option( + "--nidm_file_list", + "-nl", + required=True, + help="A comma separated list of NIDM files with full path", +) +@click.option( + "--type", + "-t", + required=True, + type=click.Choice( + ["turtle", "jsonld", "xml-rdf", "n3", "trig"], case_sensitive=False + ), + help="If parameter set then NIDM file will be exported as JSONLD", +) +@click.option( + "--outdir", + "-out", + required=False, + help="Optional directory to save converted NIDM file", +) +def convert(nidm_file_list, type, outdir): """ This function will convert NIDM files to various RDF-supported formats and name then / put them in the same place as the input file. """ - for nidm_file in nidm_file_list.split(','): + for nidm_file in nidm_file_list.split(","): # WIP: for now we use pynidm for jsonld exports to make more human readable and rdflib for everything # else. if outdir: - outfile = join(outdir,splitext(basename(nidm_file))[0]) + outfile = join(outdir, splitext(basename(nidm_file))[0]) else: outfile = join(splitext(nidm_file)[0]) - if type == 'jsonld': + if type == "jsonld": # read in nidm file project = read_nidm(nidm_file) - #write jsonld file with same name - with open(outfile + ".json", 'w') as f: + # write jsonld file with same name + with open(outfile + ".json", "w") as f: f.write(project.serializeJSONLD()) - elif type == 'turtle': - #graph = Graph() - #graph.parse(nidm_file, format=util.guess_format(nidm_file)) - #graph.serialize(splitext(nidm_file)[0] + ".ttl", format='turtle') + elif type == "turtle": + # graph = Graph() + # graph.parse(nidm_file, format=util.guess_format(nidm_file)) + # graph.serialize(splitext(nidm_file)[0] + ".ttl", format='turtle') project = read_nidm(nidm_file) - with open(outfile + ".ttl", 'w') as f: + with open(outfile + ".ttl", "w") as f: f.write(project.serializeTurtle()) - elif type == 'xml-rdf': + elif type == "xml-rdf": graph = Graph() graph.parse(nidm_file, format=util.guess_format(nidm_file)) - graph.serialize(outfile + ".xml", format='pretty-xml') - elif type == 'n3': + graph.serialize(outfile + ".xml", format="pretty-xml") + elif type == "n3": graph = Graph() graph.parse(nidm_file, format=util.guess_format(nidm_file)) - graph.serialize(outfile + ".n3", format='n3') - elif type == 'trig': + graph.serialize(outfile + ".n3", format="n3") + elif type == "trig": # read in nidm file project = read_nidm(nidm_file) - with open(outfile + ".trig", 'w') as f: + with open(outfile + ".trig", "w") as f: f.write(project.serializeTrig()) else: print("Error, type is not supported at this time") if __name__ == "__main__": - convert() + convert() diff --git a/nidm/experiment/tools/nidm_gmm.py b/nidm/experiment/tools/nidm_gmm.py index a96ba2b0..89356b8d 100644 --- a/nidm/experiment/tools/nidm_gmm.py +++ b/nidm/experiment/tools/nidm_gmm.py @@ -1,35 +1,58 @@ +import csv import os import tempfile -import pandas as pd -import csv -from nidm.experiment.Query import GetProjectsUUID import click +import matplotlib.pyplot as plt +from nidm.experiment.Query import GetProjectsUUID from nidm.experiment.tools.click_base import cli from nidm.experiment.tools.rest import RestParser import numpy as np -import matplotlib.pyplot as plt +import pandas as pd +from sklearn import preprocessing from sklearn.metrics import silhouette_score from sklearn.mixture import GaussianMixture from sklearn.preprocessing import LabelEncoder, MinMaxScaler -from sklearn import preprocessing + @cli.command() -@click.option("--nidm_file_list", "-nl", required=True, - help="A comma separated list of NIDM files with full path") -@click.option("--var","-variables", required=True, - help="This parameter is for the variables the user would like to complete the k-means algorithm on.\nThe way this looks in the command is python3 nidm_kmeans.py -nl MTdemog_aseg_v2.ttl -v \"fs_003343,age*sex,sex,age,group,age*group,bmi\"") -@click.option("--k_range", "-k", required=True, - help="The maximum number of clusters to try. The algorithm will go from 2 to this number to determine the optimal number of clusters.") -@click.option("--optimal_cluster_method", "-m", required=True, - help="The criterion used to select the optimal partitioning (either Silhouette Score, AIC, or BIC).") -@click.option("--output_file", "-o", required=False, - help="Optional output file (TXT) to store results of the linear regression, contrast, and regularization") +@click.option( + "--nidm_file_list", + "-nl", + required=True, + help="A comma separated list of NIDM files with full path", +) +@click.option( + "--var", + "-variables", + required=True, + help='This parameter is for the variables the user would like to complete the k-means algorithm on.\nThe way this looks in the command is python3 nidm_kmeans.py -nl MTdemog_aseg_v2.ttl -v "fs_003343,age*sex,sex,age,group,age*group,bmi"', +) +@click.option( + "--k_range", + "-k", + required=True, + help="The maximum number of clusters to try. The algorithm will go from 2 to this number to determine the optimal number of clusters.", +) +@click.option( + "--optimal_cluster_method", + "-m", + required=True, + help="The criterion used to select the optimal partitioning (either Silhouette Score, AIC, or BIC).", +) +@click.option( + "--output_file", + "-o", + required=False, + help="Optional output file (TXT) to store results of the linear regression, contrast, and regularization", +) def gmm(nidm_file_list, output_file, var, k_range, optimal_cluster_method): """ - This function provides a tool to complete k-means clustering on NIDM data. - """ + This function provides a tool to complete k-means clustering on NIDM data. + """ global v # Needed to do this because the code only used the parameters in the first method, meaning I had to move it all to method 1. - v = var.strip() # used in data_aggregation, kmenas(), spaces stripped from left and right + v = ( + var.strip() + ) # used in data_aggregation, kmenas(), spaces stripped from left and right global o # used in dataparsing() o = output_file global n # used in data_aggregation() @@ -44,20 +67,32 @@ def gmm(nidm_file_list, output_file, var, k_range, optimal_cluster_method): def data_aggregation(): # all data from all the files is collected - """ This function provides query support for NIDM graphs. """ + """This function provides query support for NIDM graphs.""" # query result list results = [] # if there is a CDE file list, seed the CDE cache - if v: #ex: age,sex,DX_GROUP - print("***********************************************************************************************************") - command = "pynidm k-means -nl " + n + " -variables \"" + v + "\" " + "-k " + str(k_num) + " -m " + cm + if v: # ex: age,sex,DX_GROUP + print( + "***********************************************************************************************************" + ) + command = ( + "pynidm k-means -nl " + + n + + ' -variables "' + + v + + '" ' + + "-k " + + str(k_num) + + " -m " + + cm + ) print("Your command was: " + command) - if (o is not None): + if o is not None: f = open(o, "w") f.write("Your command was " + command) f.close() - verbosity=0 + verbosity = 0 restParser = RestParser(verbosity_level=int(verbosity)) restParser.setOutputFormat(RestParser.OBJECT_FORMAT) global df_list # used in dataparsing() @@ -86,98 +121,142 @@ def data_aggregation(): # all data from all the files is collected # below, we edit the model so it splits by +,~, or =. However, to help it out in catching everything # we replaced ~ and = with a + so that we can still use split. Regex wasn't working. var_list = v.split(",") - for i in range(len(var_list)): # here, we remove any leading or trailing spaces + for i in range( + len(var_list) + ): # here, we remove any leading or trailing spaces var_list[i] = var_list[i].strip() # set the dependent variable to the one dependent variable in the model global vars # used in dataparsing() vars = "" for i in range(len(var_list) - 1, -1, -1): - if not "*" in var_list[i]: # removing the star term from the columns we're about to pull from data + if ( + not "*" in var_list[i] + ): # removing the star term from the columns we're about to pull from data vars = vars + var_list[i] + "," else: - print("Interacting variables are not present in clustering models. They will be removed.") - vars = vars[0:len(vars) - 1] - uri = "/projects/" + project[0].toPython().split("/")[-1] + "?fields=" + vars + print( + "Interacting variables are not present in clustering models. They will be removed." + ) + vars = vars[0 : len(vars) - 1] + uri = ( + "/projects/" + project[0].toPython().split("/")[-1] + "?fields=" + vars + ) # get fields output from each file and concatenate df_list_holder[count].append(pd.DataFrame(restParser.run([nidm_file], uri))) # global dep_var df = pd.concat(df_list_holder[count]) - with tempfile.NamedTemporaryFile(delete=False) as temp: # turns the dataframe into a temporary csv - df.to_csv(temp.name + '.csv') + with tempfile.NamedTemporaryFile( + delete=False + ) as temp: # turns the dataframe into a temporary csv + df.to_csv(temp.name + ".csv") temp.close() - data = list(csv.reader(open( - temp.name + '.csv'))) # makes the csv a 2D list to make it easier to call the contents of certain cells + data = list( + csv.reader(open(temp.name + ".csv")) + ) # makes the csv a 2D list to make it easier to call the contents of certain cells var_list = vars.split(",") # makes a list of the independent variables numcols = (len(data) - 1) // ( - len(var_list)) # Finds the number of columns in the original dataframe + len(var_list) + ) # Finds the number of columns in the original dataframe global condensed_data # also used in linreg() condensed_data_holder[count] = [ - [0] * (len(var_list))] # makes an array 1 row by the number of necessary columns + [0] * (len(var_list)) + ] # makes an array 1 row by the number of necessary columns for i in range( - numcols): # makes the 2D array big enough to store all of the necessary values in the edited dataset + numcols + ): # makes the 2D array big enough to store all of the necessary values in the edited dataset condensed_data_holder[count].append([0] * (len(var_list))) for m in range(0, len(var_list)): end_url = var_list[m].split("/") if "/" in var_list[m]: var_list[m] = end_url[len(end_url) - 1] - for i in range(len(var_list)): # stores the independent variable names in the first row + for i in range( + len(var_list) + ): # stores the independent variable names in the first row condensed_data_holder[count][0][i] = var_list[i] numrows = 1 # begins at the first row to add data - fieldcolumn = 0 # the column the variable name is in in the original dataset + fieldcolumn = ( + 0 # the column the variable name is in in the original dataset + ) valuecolumn = 0 # the column the value is in in the original dataset datacolumn = 0 # if it is identified by the dataElement name instead of the field's name not_found_list = [] for i in range(len(data[0])): - if data[0][i] == 'sourceVariable': # finds the column where the variable names are + if ( + data[0][i] == "sourceVariable" + ): # finds the column where the variable names are fieldcolumn = i - elif data[0][i] == 'source_variable': # finds the column where the variable names are + elif ( + data[0][i] == "source_variable" + ): # finds the column where the variable names are fieldcolumn = i - elif data[0][i] == 'isAbout': + elif data[0][i] == "isAbout": aboutcolumn = i - elif data[0][i] == 'label': + elif data[0][i] == "label": namecolumn = i # finds the column where the variable names are - elif data[0][i] == 'value': + elif data[0][i] == "value": valuecolumn = i # finds the column where the values are - elif data[0][i] == 'dataElement': # finds the column where the data element is if necessary + elif ( + data[0][i] == "dataElement" + ): # finds the column where the data element is if necessary datacolumn = i for i in range( - len(condensed_data_holder[count][ - 0])): # starts iterating through the dataset, looking for the name in that - for j in range(1, len(data)): # column, so it can append the values under the proper variables + len(condensed_data_holder[count][0]) + ): # starts iterating through the dataset, looking for the name in that + for j in range( + 1, len(data) + ): # column, so it can append the values under the proper variables try: - if data[j][fieldcolumn] == condensed_data_holder[count][0][ - i]: # in the dataframe, the name is in column 3 + if ( + data[j][fieldcolumn] == condensed_data_holder[count][0][i] + ): # in the dataframe, the name is in column 3 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][aboutcolumn] == condensed_data_holder[count][0][ - i]: + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif data[j][aboutcolumn] == condensed_data_holder[count][0][i]: condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif condensed_data_holder[count][0][ - i] in data[j][ - aboutcolumn]: # this is in case the uri only works by querying the part after the last backslash + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + condensed_data_holder[count][0][i] in data[j][aboutcolumn] + ): # this is in case the uri only works by querying the part after the last backslash condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][namecolumn] == condensed_data_holder[count][0][ - i]: # in the dataframe, the name is in column 12 + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + data[j][namecolumn] == condensed_data_holder[count][0][i] + ): # in the dataframe, the name is in column 12 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif condensed_data_holder[count][0][i] == data[j][ - datacolumn]: # in the dataframe, the name is in column 9 + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + condensed_data_holder[count][0][i] == data[j][datacolumn] + ): # in the dataframe, the name is in column 9 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values except IndexError: numrows = numrows + 1 numrows = 1 # resets to the first row for the next variable temp_list = condensed_data_holder[count] - for j in range(len(temp_list[0]) - 1, 0, - -1): # if the software appends a column with 0 as the heading, it removes this null column + for j in range( + len(temp_list[0]) - 1, 0, -1 + ): # if the software appends a column with 0 as the heading, it removes this null column if temp_list[0][j] == "0" or temp_list[0][j] == "NaN": for row in condensed_data_holder[count]: row.pop(j) @@ -192,7 +271,9 @@ def data_aggregation(): # all data from all the files is collected count1 = 0 for i in range(len(condensed_data_holder[count][0])): if " " in condensed_data_holder[count][0][i]: - condensed_data_holder[count][0][i] = condensed_data_holder[count][0][i].replace(" ", "_") + condensed_data_holder[count][0][i] = condensed_data_holder[count][ + 0 + ][i].replace(" ", "_") for i in range(len(var_list)): if "/" in var_list[i]: split = var_list[i].split("/") @@ -202,21 +283,28 @@ def data_aggregation(): # all data from all the files is collected count = count + 1 if len(not_found_list) > 0: print( - "***********************************************************************************************************") + "***********************************************************************************************************" + ) print() print("Your variables were " + v) print() print( - "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables.") - if (o is not None): + "The following variables were not found in " + + nidm_file + + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." + ) + if o is not None: f = open(o, "a") f.write("Your variables were " + v) f.write( - "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables.") + "The following variables were not found in " + + nidm_file + + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." + ) f.close() for i in range(0, len(not_found_list)): print(str(i + 1) + ". " + not_found_list[i]) - if (o is not None): + if o is not None: f = open(o, "a") f.write(str(i + 1) + ". " + not_found_list[i]) f.close() @@ -227,36 +315,46 @@ def data_aggregation(): # all data from all the files is collected if not_found_count > 0: exit(1) - else: print("ERROR: No query parameter provided. See help:") print() os.system("pynidm k-means --help") exit(1) -def dataparsing(): #The data is changed to a format that is usable by the linear regression method + +def dataparsing(): # The data is changed to a format that is usable by the linear regression method global condensed_data condensed_data = [] for i in range(0, len(file_list)): condensed_data = condensed_data + condensed_data_holder[i] global k_num - if len(condensed_data[0])<=k_num: - print("\nThe maximum number of clusters specified is greater than the amount of data present.") - print("The algorithm cannot run with this, so k_num will be reduced to 1 less than the length of the dataset.") - k_num = len(condensed_data) -1 + if len(condensed_data[0]) <= k_num: + print( + "\nThe maximum number of clusters specified is greater than the amount of data present." + ) + print( + "The algorithm cannot run with this, so k_num will be reduced to 1 less than the length of the dataset." + ) + k_num = len(condensed_data) - 1 print("The k_num value is now: " + str(k_num)) - x = pd.read_csv(opencsv(condensed_data)) # changes the dataframe to a csv to make it easier to work with + x = pd.read_csv( + opencsv(condensed_data) + ) # changes the dataframe to a csv to make it easier to work with x.head() # prints what the csv looks like x.dtypes # checks data format obj_df = x.select_dtypes # puts all the variables in a dataset x.shape # says number of rows and columns in form of tuple x.describe() # says dataset statistics obj_df = x.select_dtypes( - include=['object']).copy() # takes everything that is an object (not float or int) and puts it in a new dataset + include=["object"] + ).copy() # takes everything that is an object (not float or int) and puts it in a new dataset obj_df.head() # prints the new dataset - int_df = x.select_dtypes(include=['int64']).copy() # takes everything that is an int and puts it in a new dataset + int_df = x.select_dtypes( + include=["int64"] + ).copy() # takes everything that is an int and puts it in a new dataset float_df = x.select_dtypes( - include=['float64']).copy() # takes everything that is a float and puts it in a new dataset + include=["float64"] + ).copy() # takes everything that is a float and puts it in a new dataset df_int_float = pd.concat([float_df, int_df], axis=1) stringvars = [] # starts a list that will store all variables that are not numbers for i in range(1, len(condensed_data)): # goes through each variable @@ -264,30 +362,42 @@ def dataparsing(): #The data is changed to a format that is usable by the linear try: # if the value of the field can be turned into a float (is numerical) float(condensed_data[i][j]) # this means it's a number except ValueError: # if it can't be (is a string) - if condensed_data[0][ - j] not in stringvars: # adds the variable name to the list if it isn't there already + if ( + condensed_data[0][j] not in stringvars + ): # adds the variable name to the list if it isn't there already stringvars.append(condensed_data[0][j]) - le = preprocessing.LabelEncoder() # anything involving le shows the encoding of categorical variables + le = ( + preprocessing.LabelEncoder() + ) # anything involving le shows the encoding of categorical variables for i in range(len(stringvars)): le.fit(obj_df[stringvars[i]].astype(str)) - obj_df_trf = obj_df.astype(str).apply(le.fit_transform) # transforms the categorical variables into numbers. + obj_df_trf = obj_df.astype(str).apply( + le.fit_transform + ) # transforms the categorical variables into numbers. global df_final # also used in linreg() if not obj_df_trf.empty: - df_final = pd.concat([df_int_float, obj_df_trf], axis=1) # join_axes=[df_int_float.index]) + df_final = pd.concat( + [df_int_float, obj_df_trf], axis=1 + ) # join_axes=[df_int_float.index]) else: df_final = df_int_float df_final.head() # shows the final dataset with all the encoding print(df_final) # prints the final dataset print() - print("***********************************************************************************************************") + print( + "***********************************************************************************************************" + ) print() - if (o is not None): + if o is not None: f = open(o, "a") f.write(df_final.to_string(header=True, index=True)) f.write( - "\n\n***********************************************************************************************************") + "\n\n***********************************************************************************************************" + ) f.write("\n\nModel Results: ") f.close() + + def cluster_number(): index = 0 global levels # also used in contrasting() @@ -300,8 +410,8 @@ def cluster_number(): # Beginning of the linear regression global X - #global y - #Unsure on how to proceed here with interacting variables, since I'm sure dmatrices won't work + # global y + # Unsure on how to proceed here with interacting variables, since I'm sure dmatrices won't work """scaler = MinMaxScaler() @@ -315,32 +425,34 @@ def cluster_number(): ss = [] for i in range(2, k_num): - model = GaussianMixture(n_components=i, init_params='kmeans') + model = GaussianMixture(n_components=i, init_params="kmeans") cluster_labels = model.fit_predict(X) silhouette_avg = silhouette_score(X, cluster_labels) ss.append(silhouette_avg) optimal_i = 0 - distance_to_one = abs(1-ss[0]) - for i in range(0,len(ss)): - if abs(1-ss[i]) <= distance_to_one: + distance_to_one = abs(1 - ss[0]) + for i in range(0, len(ss)): + if abs(1 - ss[i]) <= distance_to_one: optimal_i = i - distance_to_one = abs(1-ss[i]) + distance_to_one = abs(1 - ss[i]) n_clusters = optimal_i + 2 - print("Optimal number of clusters: " + str(n_clusters)) #optimal number of clusters + print( + "Optimal number of clusters: " + str(n_clusters) + ) # optimal number of clusters gmm = GaussianMixture(n_components=n_clusters).fit(X) labels = gmm.fit(X).predict(X) ax = None or plt.gca() X = df_final[var_list].to_numpy() - ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2) - ax.axis('equal') + ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap="viridis", zorder=2) + ax.axis("equal") plt.show() if "a" in cm.lower(): print("AIC\n") aic = [] for i in range(2, k_num): - model = GaussianMixture(n_components=i, init_params='kmeans') + model = GaussianMixture(n_components=i, init_params="kmeans") model.fit(X) aic.append(model.bic(X)) min_aic = aic[0] @@ -349,8 +461,10 @@ def cluster_number(): if aic[i] <= min_aic: min_aic = aic[i] min_i = i - n_clusters = min_i +2 - print("Optimal number of clusters: " + str(n_clusters)) #optimal number of clusters, minimizing aic + n_clusters = min_i + 2 + print( + "Optimal number of clusters: " + str(n_clusters) + ) # optimal number of clusters, minimizing aic """min_aic = aic[0] max_aic = aic[0] max_i = 0 @@ -388,15 +502,15 @@ def cluster_number(): labels = gmm.fit(X).predict(X) ax = None or plt.gca() X = df_final[var_list].to_numpy() - ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2) - ax.axis('equal') + ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap="viridis", zorder=2) + ax.axis("equal") plt.show() if "b" in cm.lower(): print("\n\nBIC\n") bic = [] for i in range(2, k_num): - model = GaussianMixture(n_components=i, init_params='kmeans') + model = GaussianMixture(n_components=i, init_params="kmeans") model.fit(X) bic.append(model.bic(X)) min_bic = bic[0] @@ -437,28 +551,37 @@ def cluster_number(): n_clusters = x + 2 plt.plot(bic) plt.show()""" - print("Optimal number of clusters: " + str(n_clusters)) #optimal number of clusters + print( + "Optimal number of clusters: " + str(n_clusters) + ) # optimal number of clusters gmm = GaussianMixture(n_components=n_clusters).fit(X) labels = gmm.fit(X).predict(X) ax = None or plt.gca() X = df_final[var_list].to_numpy() - ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2) - ax.axis('equal') + ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap="viridis", zorder=2) + ax.axis("equal") plt.show() - if (o is not None): + if o is not None: f = open(o, "a") f.close() + + def opencsv(data): """saves a list of lists as a csv and opens""" - import tempfile - import os import csv - handle, fn = tempfile.mkstemp(suffix='.csv') - with os.fdopen(handle,"w", encoding='utf8',errors='surrogateescape',newline='') as f: + import os + import tempfile + + handle, fn = tempfile.mkstemp(suffix=".csv") + with os.fdopen( + handle, "w", encoding="utf8", errors="surrogateescape", newline="" + ) as f: writer = csv.writer(f) writer.writerows(data) return fn + + # it can be used calling the script `python nidm_query.py -nl ... -q .. if __name__ == "__main__": gmm() diff --git a/nidm/experiment/tools/nidm_kmeans.py b/nidm/experiment/tools/nidm_kmeans.py index e0721cdc..808fd612 100644 --- a/nidm/experiment/tools/nidm_kmeans.py +++ b/nidm/experiment/tools/nidm_kmeans.py @@ -1,39 +1,63 @@ +import csv import os import tempfile -import pandas as pd -import csv -from nidm.experiment.Query import GetProjectsUUID import click +import matplotlib.pyplot as plt +from nidm.experiment.Query import GetProjectsUUID from nidm.experiment.tools.click_base import cli from nidm.experiment.tools.rest import RestParser import numpy as np -import matplotlib.pyplot as plt +import pandas as pd +from sklearn import preprocessing from sklearn.cluster import KMeans from sklearn.decomposition import PCA -from sklearn.metrics import silhouette_score +from sklearn.metrics import ( + calinski_harabaz_score, + davies_bouldin_score, + silhouette_score, +) from sklearn.preprocessing import LabelEncoder, MinMaxScaler -from sklearn import preprocessing -from sklearn.metrics import davies_bouldin_score -from sklearn.metrics import calinski_harabaz_score @cli.command() -@click.option("--nidm_file_list", "-nl", required=True, - help="A comma separated list of NIDM files with full path") -@click.option("--var","-variables", required=True, - help="This parameter is for the variables the user would like to complete the k-means algorithm on.\nThe way this looks in the command is python3 nidm_kmeans.py -nl MTdemog_aseg_v2.ttl -v \"fs_003343,age*sex,sex,age,group,age*group,bmi\"") -@click.option("--k_range", "-k", required=True, - help="The maximum number of clusters to try. The algorithm will go from 2 to this number to determine the optimal number of clusters.") -@click.option("--optimal_cluster_method", "-m", required=True, - help="The criterion used to select the optimal partitioning (either Gap Statistic, Elbow Method, Silhouette Coefficient, Calinski-Harabasz Index, or Davies_Bouldin Index).") -@click.option("--output_file", "-o", required=False, - help="Optional output file (TXT) to store results of the linear regression, contrast, and regularization") +@click.option( + "--nidm_file_list", + "-nl", + required=True, + help="A comma separated list of NIDM files with full path", +) +@click.option( + "--var", + "-variables", + required=True, + help='This parameter is for the variables the user would like to complete the k-means algorithm on.\nThe way this looks in the command is python3 nidm_kmeans.py -nl MTdemog_aseg_v2.ttl -v "fs_003343,age*sex,sex,age,group,age*group,bmi"', +) +@click.option( + "--k_range", + "-k", + required=True, + help="The maximum number of clusters to try. The algorithm will go from 2 to this number to determine the optimal number of clusters.", +) +@click.option( + "--optimal_cluster_method", + "-m", + required=True, + help="The criterion used to select the optimal partitioning (either Gap Statistic, Elbow Method, Silhouette Coefficient, Calinski-Harabasz Index, or Davies_Bouldin Index).", +) +@click.option( + "--output_file", + "-o", + required=False, + help="Optional output file (TXT) to store results of the linear regression, contrast, and regularization", +) def k_means(nidm_file_list, output_file, var, k_range, optimal_cluster_method): """ - This function provides a tool to complete k-means clustering on NIDM data. - """ + This function provides a tool to complete k-means clustering on NIDM data. + """ global v # Needed to do this because the code only used the parameters in the first method, meaning I had to move it all to method 1. - v = var.strip() # used in data_aggregation, kmenas(), spaces stripped from left and right + v = ( + var.strip() + ) # used in data_aggregation, kmenas(), spaces stripped from left and right global o # used in dataparsing() o = output_file global n # used in data_aggregation() @@ -46,21 +70,34 @@ def k_means(nidm_file_list, output_file, var, k_range, optimal_cluster_method): dataparsing() cluster_number() + def data_aggregation(): # all data from all the files is collected - """ This function provides query support for NIDM graphs. """ + """This function provides query support for NIDM graphs.""" # query result list results = [] # if there is a CDE file list, seed the CDE cache - if v: #ex: age,sex,DX_GROUP - print("***********************************************************************************************************") - command = "pynidm k-means -nl " + n + " -variables \"" + v + "\" " + "-k " + str(k_num) + " -m " + cm + if v: # ex: age,sex,DX_GROUP + print( + "***********************************************************************************************************" + ) + command = ( + "pynidm k-means -nl " + + n + + ' -variables "' + + v + + '" ' + + "-k " + + str(k_num) + + " -m " + + cm + ) print("Your command was: " + command) - if (o is not None): + if o is not None: f = open(o, "w") f.write("Your command was " + command) f.close() - verbosity=0 + verbosity = 0 restParser = RestParser(verbosity_level=int(verbosity)) restParser.setOutputFormat(RestParser.OBJECT_FORMAT) global df_list # used in dataparsing() @@ -89,98 +126,142 @@ def data_aggregation(): # all data from all the files is collected # below, we edit the model so it splits by +,~, or =. However, to help it out in catching everything # we replaced ~ and = with a + so that we can still use split. Regex wasn't working. var_list = v.split(",") - for i in range(len(var_list)): # here, we remove any leading or trailing spaces + for i in range( + len(var_list) + ): # here, we remove any leading or trailing spaces var_list[i] = var_list[i].strip() # set the dependent variable to the one dependent variable in the model global vars # used in dataparsing() vars = "" for i in range(len(var_list) - 1, -1, -1): - if not "*" in var_list[i]: # removing the star term from the columns we're about to pull from data + if ( + not "*" in var_list[i] + ): # removing the star term from the columns we're about to pull from data vars = vars + var_list[i] + "," else: - print("Interacting variables are not present in clustering models. They will be removed.") - vars = vars[0:len(vars) - 1] - uri = "/projects/" + project[0].toPython().split("/")[-1] + "?fields=" + vars + print( + "Interacting variables are not present in clustering models. They will be removed." + ) + vars = vars[0 : len(vars) - 1] + uri = ( + "/projects/" + project[0].toPython().split("/")[-1] + "?fields=" + vars + ) # get fields output from each file and concatenate df_list_holder[count].append(pd.DataFrame(restParser.run([nidm_file], uri))) # global dep_var df = pd.concat(df_list_holder[count]) - with tempfile.NamedTemporaryFile(delete=False) as temp: # turns the dataframe into a temporary csv - df.to_csv(temp.name + '.csv') + with tempfile.NamedTemporaryFile( + delete=False + ) as temp: # turns the dataframe into a temporary csv + df.to_csv(temp.name + ".csv") temp.close() - data = list(csv.reader(open( - temp.name + '.csv'))) # makes the csv a 2D list to make it easier to call the contents of certain cells + data = list( + csv.reader(open(temp.name + ".csv")) + ) # makes the csv a 2D list to make it easier to call the contents of certain cells var_list = vars.split(",") # makes a list of the independent variables numcols = (len(data) - 1) // ( - len(var_list)) # Finds the number of columns in the original dataframe + len(var_list) + ) # Finds the number of columns in the original dataframe global condensed_data # also used in linreg() condensed_data_holder[count] = [ - [0] * (len(var_list))] # makes an array 1 row by the number of necessary columns + [0] * (len(var_list)) + ] # makes an array 1 row by the number of necessary columns for i in range( - numcols): # makes the 2D array big enough to store all of the necessary values in the edited dataset + numcols + ): # makes the 2D array big enough to store all of the necessary values in the edited dataset condensed_data_holder[count].append([0] * (len(var_list))) for m in range(0, len(var_list)): end_url = var_list[m].split("/") if "/" in var_list[m]: var_list[m] = end_url[len(end_url) - 1] - for i in range(len(var_list)): # stores the independent variable names in the first row + for i in range( + len(var_list) + ): # stores the independent variable names in the first row condensed_data_holder[count][0][i] = var_list[i] numrows = 1 # begins at the first row to add data - fieldcolumn = 0 # the column the variable name is in in the original dataset + fieldcolumn = ( + 0 # the column the variable name is in in the original dataset + ) valuecolumn = 0 # the column the value is in in the original dataset datacolumn = 0 # if it is identified by the dataElement name instead of the field's name not_found_list = [] for i in range(len(data[0])): - if data[0][i] == 'sourceVariable': # finds the column where the variable names are + if ( + data[0][i] == "sourceVariable" + ): # finds the column where the variable names are fieldcolumn = i - elif data[0][i] == 'source_variable': # finds the column where the variable names are + elif ( + data[0][i] == "source_variable" + ): # finds the column where the variable names are fieldcolumn = i - elif data[0][i] == 'isAbout': + elif data[0][i] == "isAbout": aboutcolumn = i - elif data[0][i] == 'label': + elif data[0][i] == "label": namecolumn = i # finds the column where the variable names are - elif data[0][i] == 'value': + elif data[0][i] == "value": valuecolumn = i # finds the column where the values are - elif data[0][i] == 'dataElement': # finds the column where the data element is if necessary + elif ( + data[0][i] == "dataElement" + ): # finds the column where the data element is if necessary datacolumn = i for i in range( - len(condensed_data_holder[count][ - 0])): # starts iterating through the dataset, looking for the name in that - for j in range(1, len(data)): # column, so it can append the values under the proper variables + len(condensed_data_holder[count][0]) + ): # starts iterating through the dataset, looking for the name in that + for j in range( + 1, len(data) + ): # column, so it can append the values under the proper variables try: - if data[j][fieldcolumn] == condensed_data_holder[count][0][ - i]: # in the dataframe, the name is in column 3 + if ( + data[j][fieldcolumn] == condensed_data_holder[count][0][i] + ): # in the dataframe, the name is in column 3 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][aboutcolumn] == condensed_data_holder[count][0][ - i]: + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif data[j][aboutcolumn] == condensed_data_holder[count][0][i]: condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif condensed_data_holder[count][0][ - i] in data[j][ - aboutcolumn]: # this is in case the uri only works by querying the part after the last backslash + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + condensed_data_holder[count][0][i] in data[j][aboutcolumn] + ): # this is in case the uri only works by querying the part after the last backslash condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][namecolumn] == condensed_data_holder[count][0][ - i]: # in the dataframe, the name is in column 12 + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + data[j][namecolumn] == condensed_data_holder[count][0][i] + ): # in the dataframe, the name is in column 12 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif condensed_data_holder[count][0][i] == data[j][ - datacolumn]: # in the dataframe, the name is in column 9 + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + condensed_data_holder[count][0][i] == data[j][datacolumn] + ): # in the dataframe, the name is in column 9 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values except IndexError: numrows = numrows + 1 numrows = 1 # resets to the first row for the next variable temp_list = condensed_data_holder[count] - for j in range(len(temp_list[0]) - 1, 0, - -1): # if the software appends a column with 0 as the heading, it removes this null column + for j in range( + len(temp_list[0]) - 1, 0, -1 + ): # if the software appends a column with 0 as the heading, it removes this null column if temp_list[0][j] == "0" or temp_list[0][j] == "NaN": for row in condensed_data_holder[count]: row.pop(j) @@ -195,7 +276,9 @@ def data_aggregation(): # all data from all the files is collected count1 = 0 for i in range(len(condensed_data_holder[count][0])): if " " in condensed_data_holder[count][0][i]: - condensed_data_holder[count][0][i] = condensed_data_holder[count][0][i].replace(" ", "_") + condensed_data_holder[count][0][i] = condensed_data_holder[count][ + 0 + ][i].replace(" ", "_") for i in range(len(var_list)): if "/" in var_list[i]: split = var_list[i].split("/") @@ -205,21 +288,28 @@ def data_aggregation(): # all data from all the files is collected count = count + 1 if len(not_found_list) > 0: print( - "***********************************************************************************************************") + "***********************************************************************************************************" + ) print() print("Your variables were " + v) print() print( - "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables.") - if (o is not None): + "The following variables were not found in " + + nidm_file + + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." + ) + if o is not None: f = open(o, "a") f.write("Your variables were " + v) f.write( - "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables.") + "The following variables were not found in " + + nidm_file + + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." + ) f.close() for i in range(0, len(not_found_list)): print(str(i + 1) + ". " + not_found_list[i]) - if (o is not None): + if o is not None: f = open(o, "a") f.write(str(i + 1) + ". " + not_found_list[i]) f.close() @@ -230,36 +320,46 @@ def data_aggregation(): # all data from all the files is collected if not_found_count > 0: exit(1) - else: print("ERROR: No query parameter provided. See help:") print() os.system("pynidm k-means --help") exit(1) -def dataparsing(): #The data is changed to a format that is usable by the linear regression method + +def dataparsing(): # The data is changed to a format that is usable by the linear regression method global condensed_data condensed_data = [] for i in range(0, len(file_list)): condensed_data = condensed_data + condensed_data_holder[i] global k_num if len(condensed_data[0]) <= k_num: - print("\nThe maximum number of clusters specified is greater than the amount of data present.") - print("The algorithm cannot run with this, so k_num will be reduced to 1 less than the length of the dataset.") + print( + "\nThe maximum number of clusters specified is greater than the amount of data present." + ) + print( + "The algorithm cannot run with this, so k_num will be reduced to 1 less than the length of the dataset." + ) k_num = len(condensed_data) - 1 print("The k_num value is now: " + str(k_num)) - x = pd.read_csv(opencsv(condensed_data)) # changes the dataframe to a csv to make it easier to work with + x = pd.read_csv( + opencsv(condensed_data) + ) # changes the dataframe to a csv to make it easier to work with x.head() # prints what the csv looks like x.dtypes # checks data format obj_df = x.select_dtypes # puts all the variables in a dataset x.shape # says number of rows and columns in form of tuple x.describe() # says dataset statistics obj_df = x.select_dtypes( - include=['object']).copy() # takes everything that is an object (not float or int) and puts it in a new dataset + include=["object"] + ).copy() # takes everything that is an object (not float or int) and puts it in a new dataset obj_df.head() # prints the new dataset - int_df = x.select_dtypes(include=['int64']).copy() # takes everything that is an int and puts it in a new dataset + int_df = x.select_dtypes( + include=["int64"] + ).copy() # takes everything that is an int and puts it in a new dataset float_df = x.select_dtypes( - include=['float64']).copy() # takes everything that is a float and puts it in a new dataset + include=["float64"] + ).copy() # takes everything that is a float and puts it in a new dataset df_int_float = pd.concat([float_df, int_df], axis=1) stringvars = [] # starts a list that will store all variables that are not numbers for i in range(1, len(condensed_data)): # goes through each variable @@ -267,30 +367,42 @@ def dataparsing(): #The data is changed to a format that is usable by the linear try: # if the value of the field can be turned into a float (is numerical) float(condensed_data[i][j]) # this means it's a number except ValueError: # if it can't be (is a string) - if condensed_data[0][ - j] not in stringvars: # adds the variable name to the list if it isn't there already + if ( + condensed_data[0][j] not in stringvars + ): # adds the variable name to the list if it isn't there already stringvars.append(condensed_data[0][j]) - le = preprocessing.LabelEncoder() # anything involving le shows the encoding of categorical variables + le = ( + preprocessing.LabelEncoder() + ) # anything involving le shows the encoding of categorical variables for i in range(len(stringvars)): le.fit(obj_df[stringvars[i]].astype(str)) - obj_df_trf = obj_df.astype(str).apply(le.fit_transform) # transforms the categorical variables into numbers. + obj_df_trf = obj_df.astype(str).apply( + le.fit_transform + ) # transforms the categorical variables into numbers. global df_final # also used in linreg() if not obj_df_trf.empty: - df_final = pd.concat([df_int_float, obj_df_trf], axis=1) # join_axes=[df_int_float.index]) + df_final = pd.concat( + [df_int_float, obj_df_trf], axis=1 + ) # join_axes=[df_int_float.index]) else: df_final = df_int_float df_final.head() # shows the final dataset with all the encoding print(df_final) # prints the final dataset print() - print("***********************************************************************************************************") + print( + "***********************************************************************************************************" + ) print() - if (o is not None): + if o is not None: f = open(o, "a") f.write(df_final.to_string(header=True, index=True)) f.write( - "\n\n***********************************************************************************************************") + "\n\n***********************************************************************************************************" + ) f.write("\n\nModel Results: ") f.close() + + def cluster_number(): index = 0 global levels # also used in contrasting() @@ -303,8 +415,8 @@ def cluster_number(): # Beginning of the linear regression global X - #global y - #Unsure on how to proceed here with interacting variables, since I'm sure dmatrices won't work + # global y + # Unsure on how to proceed here with interacting variables, since I'm sure dmatrices won't work """scaler = MinMaxScaler() @@ -314,12 +426,12 @@ def cluster_number(): X = df_final[var_list] if "ga" in cm.lower(): print("\n\nGap Statistic") - gaps = np.zeros((len(range(2,int(k_num))))) + gaps = np.zeros((len(range(2, int(k_num))))) global resulting_df - resulting_df = pd.DataFrame({'clusterCount':[],'gap':[]}) + resulting_df = pd.DataFrame({"clusterCount": [], "gap": []}) global gap_index, k - for gap_index, k in enumerate(range(2,int(k_num))): - dispersion_results = np.zeros(3) #make three random datasets + for gap_index, k in enumerate(range(2, int(k_num))): + dispersion_results = np.zeros(3) # make three random datasets for i in range(3): random_reference = np.random.random_sample(size=df_final.shape) km = KMeans(k) @@ -335,25 +447,35 @@ def cluster_number(): gaps[gap_index] = gap max_gap = gaps[0] optimal_i = 0 - for i in range(1,len(gaps)): + for i in range(1, len(gaps)): if gaps[i] > max_gap: optimal_i = i max_gap = gaps[i] optimal_cluster = optimal_i + 2 - print("Optimal number of clusters: " + str(optimal_cluster)) #the optimal number of clusters for gap statistic - km = KMeans(n_clusters=optimal_cluster, init='k-means++', max_iter=300, n_init=10, random_state=0) + print( + "Optimal number of clusters: " + str(optimal_cluster) + ) # the optimal number of clusters for gap statistic + km = KMeans( + n_clusters=optimal_cluster, + init="k-means++", + max_iter=300, + n_init=10, + random_state=0, + ) labels = km.fit(X).predict(X) ax = None or plt.gca() X = df_final[var_list].to_numpy() - ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2) - ax.axis('equal') + ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap="viridis", zorder=2) + ax.axis("equal") plt.show() if "el" in cm.lower(): print("\n\nElbow Method") sse = [] - for i in range(2,int(k_num)): - km = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) + for i in range(2, int(k_num)): + km = KMeans( + n_clusters=i, init="k-means++", max_iter=300, n_init=10, random_state=0 + ) model = km.fit(X) sse.append(km.inertia_) min_sse = sse[0] @@ -369,36 +491,46 @@ def cluster_number(): min_i = i p1 = np.array([min_i, sse[min_i]]) p2 = np.array([max_i, sse[max_i]]) - #the way I am doing the elbow method is as follows: - #the different sse values form a curve like an L (like an exponential decay) - #The elbow is the point furthest from a line connecting max and min - #So I am calculating the distance, and the maximum distance from point to curve shows the optimal point - #AKA the number of clusters + # the way I am doing the elbow method is as follows: + # the different sse values form a curve like an L (like an exponential decay) + # The elbow is the point furthest from a line connecting max and min + # So I am calculating the distance, and the maximum distance from point to curve shows the optimal point + # AKA the number of clusters dist = [] - for n in range(0,len(sse)): + for n in range(0, len(sse)): norm = np.linalg.norm - p3 = np.array([n,sse[n]]) - dist.append(np.abs(norm(np.cross(p2-p1, p1-p3)))/norm(p2-p1)) + p3 = np.array([n, sse[n]]) + dist.append(np.abs(norm(np.cross(p2 - p1, p1 - p3))) / norm(p2 - p1)) max_dist = dist[0] optimal_cluster = 2 - for x in range(1,len(dist)): - if dist[x]>=max_dist: + for x in range(1, len(dist)): + if dist[x] >= max_dist: max_dist = dist[x] - optimal_cluster = x+2 - print("Optimal number of clusters: " + str(optimal_cluster)) #the optimal number of clusters for elbow method - km = KMeans(n_clusters=optimal_cluster, init='k-means++', max_iter=300, n_init=10, random_state=0) + optimal_cluster = x + 2 + print( + "Optimal number of clusters: " + str(optimal_cluster) + ) # the optimal number of clusters for elbow method + km = KMeans( + n_clusters=optimal_cluster, + init="k-means++", + max_iter=300, + n_init=10, + random_state=0, + ) labels = km.fit(X).predict(X) ax = None or plt.gca() X = df_final[var_list].to_numpy() - ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2) - ax.axis('equal') + ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap="viridis", zorder=2) + ax.axis("equal") plt.show() if "si" in cm.lower(): print("Silhouette Score\n") ss = [] - for i in range(2,int(k_num)): - km = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) + for i in range(2, int(k_num)): + km = KMeans( + n_clusters=i, init="k-means++", max_iter=300, n_init=10, random_state=0 + ) cluster_labels = km.fit_predict(X) silhouette_avg = silhouette_score(X, cluster_labels) ss.append(silhouette_avg) @@ -409,18 +541,29 @@ def cluster_number(): optimal_i = i distance_to_one = abs(1 - ss[i]) n_clusters = optimal_i + 2 - print("Optimal number of clusters: " + str(n_clusters)) #the optimal number of clusters - km = KMeans(n_clusters=n_clusters, init='k-means++', max_iter=300, n_init=10, random_state=0) + print( + "Optimal number of clusters: " + str(n_clusters) + ) # the optimal number of clusters + km = KMeans( + n_clusters=n_clusters, + init="k-means++", + max_iter=300, + n_init=10, + random_state=0, + ) labels = km.fit(X).predict(X) ax = None or plt.gca() X = df_final[var_list].to_numpy() - ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2) - ax.axis('equal') + ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap="viridis", zorder=2) + ax.axis("equal") plt.show() if "ca" in cm.lower(): import warnings - warnings.filterwarnings("ignore", category=FutureWarning) #it is a function for 0.24 but says it is deprecated in 0.23 + + warnings.filterwarnings( + "ignore", category=FutureWarning + ) # it is a function for 0.24 but says it is deprecated in 0.23 print("Calinski-Harabasz Index\n") pca = PCA(n_components=2) impca = pca.fit_transform(X) @@ -428,8 +571,14 @@ def cluster_number(): centers = list(range(2, int(k_num))) for center in centers: - km = KMeans(n_clusters=center, init='k-means++', max_iter=300, n_init=10, random_state=0).fit(impca) - score = calinski_harabaz_score(impca,km.labels_) + km = KMeans( + n_clusters=center, + init="k-means++", + max_iter=300, + n_init=10, + random_state=0, + ).fit(impca) + score = calinski_harabaz_score(impca, km.labels_) scores.append(score) optimal_i = 0 max_score = scores[0] @@ -439,60 +588,88 @@ def cluster_number(): optimal_i = i max_score = scores[i] n_clusters = optimal_i + 2 - print("Optimal number of clusters: " + str(n_clusters)) #the optimal number of clusters - km = KMeans(n_clusters=n_clusters, init='k-means++', max_iter=300, n_init=10, random_state=0) + print( + "Optimal number of clusters: " + str(n_clusters) + ) # the optimal number of clusters + km = KMeans( + n_clusters=n_clusters, + init="k-means++", + max_iter=300, + n_init=10, + random_state=0, + ) labels = km.fit(X).predict(X) ax = None or plt.gca() X = df_final[var_list].to_numpy() - ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2) - ax.axis('equal') + ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap="viridis", zorder=2) + ax.axis("equal") plt.show() if "da" in cm.lower(): print("Davies-Bouldin Index\n") scores = [] - centers = list(range(2,int(k_num))) + centers = list(range(2, int(k_num))) for center in centers: - km = KMeans(n_clusters=center, init='k-means++', max_iter=300, n_init=10, random_state=0) + km = KMeans( + n_clusters=center, + init="k-means++", + max_iter=300, + n_init=10, + random_state=0, + ) model = km.fit_predict(X) score = davies_bouldin_score(X, model) scores.append(score) optimal_i = 0 min_score = scores[0] - for i in range(1,len(scores)): - if scores[i]<=min_score: + for i in range(1, len(scores)): + if scores[i] <= min_score: optimal_i = i min_score = scores[i] n_clusters = optimal_i + 2 - print("Optimal number of clusters: " + str(n_clusters)) #the optimal number of clusters - km = KMeans(n_clusters=n_clusters, init='k-means++', max_iter=300, n_init=10, random_state=0) + print( + "Optimal number of clusters: " + str(n_clusters) + ) # the optimal number of clusters + km = KMeans( + n_clusters=n_clusters, + init="k-means++", + max_iter=300, + n_init=10, + random_state=0, + ) labels = km.fit(X).predict(X) ax = None or plt.gca() X = df_final[var_list].to_numpy() - ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2) - ax.axis('equal') + ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap="viridis", zorder=2) + ax.axis("equal") plt.show() if "de" in cm.lower(): print("Dendrogram") - #ask for help: how does one do a dendrogram, also without graphing? + # ask for help: how does one do a dendrogram, also without graphing? - if (o is not None): + if o is not None: f = open(o, "a") f.close() + + def opencsv(data): """saves a list of lists as a csv and opens""" - import tempfile - import os import csv - handle, fn = tempfile.mkstemp(suffix='.csv') - with os.fdopen(handle,"w", encoding='utf8',errors='surrogateescape',newline='') as f: + import os + import tempfile + + handle, fn = tempfile.mkstemp(suffix=".csv") + with os.fdopen( + handle, "w", encoding="utf8", errors="surrogateescape", newline="" + ) as f: writer = csv.writer(f) writer.writerows(data) return fn + # it can be used calling the script `python nidm_query.py -nl ... -q .. if __name__ == "__main__": k_means() diff --git a/nidm/experiment/tools/nidm_linreg.py b/nidm/experiment/tools/nidm_linreg.py index de562810..adaba375 100644 --- a/nidm/experiment/tools/nidm_linreg.py +++ b/nidm/experiment/tools/nidm_linreg.py @@ -1,6 +1,8 @@ # coding=utf-8 # !/usr/bin/env python +import csv + # ******************************************************************************************************* # ******************************************************************************************************* # nidm_linreg.py @@ -32,96 +34,109 @@ # ******************************************************************************************************* # ******************************************************************************************************* import os -import sys from os import system +from statistics import mean +import sys import tempfile -import pandas as pd -import csv -from patsy.highlevel import dmatrices -from nidm.experiment.Query import GetProjectsUUID import click +from nidm.experiment.Query import GetProjectsUUID from nidm.experiment.tools.click_base import cli from nidm.experiment.tools.rest import RestParser import numpy as np -from sklearn.linear_model import LinearRegression +import pandas as pd +from patsy.contrasts import ContrastMatrix, Diff, Helmert, Sum, Treatment +from patsy.highlevel import dmatrices from sklearn import preprocessing -from sklearn.linear_model import Ridge -from sklearn.linear_model import Lasso +from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import cross_val_score - import statsmodels.api as sm from statsmodels.formula.api import ols - -from statistics import mean - -from patsy.contrasts import Treatment -from patsy.contrasts import ContrastMatrix -from patsy.contrasts import Sum -from patsy.contrasts import Diff -from patsy.contrasts import Helmert +MAX_ALPHA = 700 -MAX_ALPHA = 700 -#Defining the parameters of the commands. +# Defining the parameters of the commands. @cli.command() -@click.option("--nidm_file_list", "-nl", required=True, - help="A comma separated list of NIDM files with full path") -@click.option("--ctr", "-contrast", required=False, - help="This parameter will show differences in relationship by group (e.g. -contrast age*sex,group). It can be one variable, interacting variables, or multiple") -@click.option("--ml", "-model", required=True, - help="This parameter will return the results of the linear regression from all nidm files supplied\nThe way this looks in the command is python3 nidm_linreg.py -nl MTdemog_aseg_v2.ttl -model \"fs_003343 = age*sex + sex + age + group + age*group + bmi\" -contrast group -r L1") -@click.option("--output_file", "-o", required=False, - help="Optional output file (TXT) to store results of the linear regression, contrast, and regularization") -@click.option("--regularization", "-r", required=False, - help="This parameter will return the results of the linear regression with L1 or L2 regularization depending on the type specified, and the weight with the maximum likelihood solution") - +@click.option( + "--nidm_file_list", + "-nl", + required=True, + help="A comma separated list of NIDM files with full path", +) +@click.option( + "--ctr", + "-contrast", + required=False, + help="This parameter will show differences in relationship by group (e.g. -contrast age*sex,group). It can be one variable, interacting variables, or multiple", +) +@click.option( + "--ml", + "-model", + required=True, + help='This parameter will return the results of the linear regression from all nidm files supplied\nThe way this looks in the command is python3 nidm_linreg.py -nl MTdemog_aseg_v2.ttl -model "fs_003343 = age*sex + sex + age + group + age*group + bmi" -contrast group -r L1', +) +@click.option( + "--output_file", + "-o", + required=False, + help="Optional output file (TXT) to store results of the linear regression, contrast, and regularization", +) +@click.option( + "--regularization", + "-r", + required=False, + help="This parameter will return the results of the linear regression with L1 or L2 regularization depending on the type specified, and the weight with the maximum likelihood solution", +) def linear_regression(nidm_file_list, output_file, ml, ctr, regularization): """ - This function provides a tool to complete a linear regression on NIDM data with optional contrast and regularization. - """ - + This function provides a tool to complete a linear regression on NIDM data with optional contrast and regularization. + """ - #NOTE: Every time I make a global variable, it is because I need it in at least one other method. - global c #used in linreg(), contrasting() - c = ctr #Storing all important parameters in global variables so they can be accessed in other methods - global m #Needed to do this because the code only used the parameters in the first method, meaning I had to move it all to method 1. - m = ml.strip() #used in data_aggregation, linreg(), spaces stripped from left and right - global o #used in dataparsing() + # NOTE: Every time I make a global variable, it is because I need it in at least one other method. + global c # used in linreg(), contrasting() + c = ctr # Storing all important parameters in global variables so they can be accessed in other methods + global m # Needed to do this because the code only used the parameters in the first method, meaning I had to move it all to method 1. + m = ( + ml.strip() + ) # used in data_aggregation, linreg(), spaces stripped from left and right + global o # used in dataparsing() o = output_file - global n #used in data_aggregation() + global n # used in data_aggregation() n = nidm_file_list global r r = regularization - data_aggregation() #collects data - dataparsing() #converts it to proper format - l = linreg() #performs linear regression - contrasting() #performs contrast - regularizing() #performs regularization + data_aggregation() # collects data + dataparsing() # converts it to proper format + l = linreg() # performs linear regression + contrasting() # performs contrast + regularizing() # performs regularization + -def data_aggregation(): #all data from all the files is collected +def data_aggregation(): # all data from all the files is collected + """ + This function provides query support for NIDM graphs. """ - This function provides query support for NIDM graphs. - """ # query result list results = [] # if there is a CDE file list, seed the CDE cache if m: # ex: fs_00343 ~ age + sex + group - print("***********************************************************************************************************") - command = "pynidm linear-regression -nl " + n + " -model \"" + m + "\" " + print( + "***********************************************************************************************************" + ) + command = "pynidm linear-regression -nl " + n + ' -model "' + m + '" ' if c: - command = command + "-contrast \"" + c + "\" " + command = command + '-contrast "' + c + '" ' if r: command = command + "-r " + r + " " print("Your command was: " + command) - if (o is not None): + if o is not None: f = open(o, "w") f.write("Your command was " + command) f.close() verbosity = 0 restParser = RestParser(verbosity_level=int(verbosity)) restParser.setOutputFormat(RestParser.OBJECT_FORMAT) - global df_list #used in dataparsing() + global df_list # used in dataparsing() df_list = [] # set up uri to do fields query for each nidm file global file_list @@ -144,123 +159,203 @@ def data_aggregation(): #all data from all the files is collected project = GetProjectsUUID([nidm_file]) # split the model into its constituent variables global full_model_variable_list - #below, we edit the model so it splits by +,~, or =. However, to help it out in catching everything - #we replaced ~ and = with a + so that we can still use split. Regex wasn't working. + # below, we edit the model so it splits by +,~, or =. However, to help it out in catching everything + # we replaced ~ and = with a + so that we can still use split. Regex wasn't working. plus_replace = m if "~" in m: - plus_replace = m.replace('~','+') + plus_replace = m.replace("~", "+") elif "=" in m: - plus_replace = m.replace('=', '+') + plus_replace = m.replace("=", "+") elif "," in m: - plus_replace = m.replace(",", '+') + plus_replace = m.replace(",", "+") model_list = plus_replace.split("+") - for i in range(len(model_list)): #here, we remove any leading or trailing spaces + for i in range( + len(model_list) + ): # here, we remove any leading or trailing spaces model_list[i] = model_list[i].strip() full_model_variable_list = [] # set the dependent variable to the one dependent variable in the model - global dep_var #used in dataparsing(), linreg(), and contrasting() + global dep_var # used in dataparsing(), linreg(), and contrasting() dep_var = model_list[0] # join the independent variables into a comma-separated list to make it easier to call from the uri - global ind_vars #used in dataparsing() + global ind_vars # used in dataparsing() ind_vars = "" - for i in range(len(model_list)-1, 0, -1): - full_model_variable_list.append(model_list[i]) #will be used in the regularization, but we need the full list - if "*" in model_list[i]: #removing the star term from the columns we're about to pull from data + for i in range(len(model_list) - 1, 0, -1): + full_model_variable_list.append( + model_list[i] + ) # will be used in the regularization, but we need the full list + if ( + "*" in model_list[i] + ): # removing the star term from the columns we're about to pull from data model_list.pop(i) elif model_list[i] == dep_var: model_list.pop(i) - print("\n\nAn independent variable cannot be the same as the dependent variable. This prevents the model from running accurately.") - print("Please try a different model removing \"" + dep_var + "\" from either the right or the left side of the equation.\n\n") - if (o is not None): + print( + "\n\nAn independent variable cannot be the same as the dependent variable. This prevents the model from running accurately." + ) + print( + 'Please try a different model removing "' + + dep_var + + '" from either the right or the left side of the equation.\n\n' + ) + if o is not None: f = open(o, "a") - f.write("\n\nAn independent variable cannot be the same as the dependent variable. This prevents the model from running accurately.") - f.write("Please try a different model removing \"" + dep_var + "\" from either the right or the left side of the equation.") + f.write( + "\n\nAn independent variable cannot be the same as the dependent variable. This prevents the model from running accurately." + ) + f.write( + 'Please try a different model removing "' + + dep_var + + '" from either the right or the left side of the equation.' + ) f.close() exit(1) else: ind_vars = ind_vars + model_list[i] + "," - ind_vars = ind_vars[0:len(ind_vars) - 1] - uri = "/projects/" + project[0].toPython().split("/")[-1] + "?fields=" + ind_vars + "," + dep_var + ind_vars = ind_vars[0 : len(ind_vars) - 1] + uri = ( + "/projects/" + + project[0].toPython().split("/")[-1] + + "?fields=" + + ind_vars + + "," + + dep_var + ) # get fields output from each file and concatenate df_list_holder[count].append(pd.DataFrame(restParser.run([nidm_file], uri))) - #global dep_var + # global dep_var df = pd.concat(df_list_holder[count]) - with tempfile.NamedTemporaryFile(delete=False) as temp: # turns the dataframe into a temporary csv - df.to_csv(temp.name + '.csv') + with tempfile.NamedTemporaryFile( + delete=False + ) as temp: # turns the dataframe into a temporary csv + df.to_csv(temp.name + ".csv") temp.close() - data = list(csv.reader(open( - temp.name + '.csv'))) # makes the csv a 2D list to make it easier to call the contents of certain cells + data = list( + csv.reader(open(temp.name + ".csv")) + ) # makes the csv a 2D list to make it easier to call the contents of certain cells global independentvariables # used in linreg - independentvariables = ind_vars.split(",") # makes a list of the independent variables + independentvariables = ind_vars.split( + "," + ) # makes a list of the independent variables numcols = (len(data) - 1) // ( - len(independentvariables) + 1) # Finds the number of columns in the original dataframe + len(independentvariables) + 1 + ) # Finds the number of columns in the original dataframe global condensed_data # also used in linreg() condensed_data_holder[count] = [ - [0] * (len(independentvariables) + 1)] # makes an array 1 row by the number of necessary columns + [0] * (len(independentvariables) + 1) + ] # makes an array 1 row by the number of necessary columns + for i in range( + numcols + ): # makes the 2D array big enough to store all of the necessary values in the edited dataset + condensed_data_holder[count].append( + [0] * (len(independentvariables) + 1) + ) for i in range( - numcols): # makes the 2D array big enough to store all of the necessary values in the edited dataset - condensed_data_holder[count].append([0] * (len(independentvariables) + 1)) - for i in range(len(independentvariables)): # stores the independent variable names in the first row + len(independentvariables) + ): # stores the independent variable names in the first row condensed_data_holder[count][0][i] = independentvariables[i] - condensed_data_holder[count][0][-1] = str(dep_var) # stores the dependent variable name in the first row + condensed_data_holder[count][0][-1] = str( + dep_var + ) # stores the dependent variable name in the first row numrows = 1 # begins at the first row to add data - fieldcolumn = 0 # the column the variable name is in in the original dataset + fieldcolumn = ( + 0 # the column the variable name is in in the original dataset + ) valuecolumn = 0 # the column the value is in in the original dataset datacolumn = 0 # if it is identified by the dataElement name instead of the field's name not_found_list = [] for i in range(len(data[0])): - if data[0][i] == 'sourceVariable': # finds the column where the variable names are + if ( + data[0][i] == "sourceVariable" + ): # finds the column where the variable names are fieldcolumn = i - elif data[0][i] == 'source_variable': # finds the column where the variable names are + elif ( + data[0][i] == "source_variable" + ): # finds the column where the variable names are fieldcolumn = i - elif data[0][i] == 'isAbout': + elif data[0][i] == "isAbout": aboutcolumn = i - elif data[0][i] == 'label': + elif data[0][i] == "label": namecolumn = i # finds the column where the variable names are - elif data[0][i] == 'value': + elif data[0][i] == "value": valuecolumn = i # finds the column where the values are - elif data[0][i] == 'dataElement': # finds the column where the data element is if necessary + elif ( + data[0][i] == "dataElement" + ): # finds the column where the data element is if necessary datacolumn = i for i in range( - len(condensed_data_holder[count][0])): # starts iterating through the dataset, looking for the name in that - for j in range(1, len(data)): # column, so it can append the values under the proper variables + len(condensed_data_holder[count][0]) + ): # starts iterating through the dataset, looking for the name in that + for j in range( + 1, len(data) + ): # column, so it can append the values under the proper variables try: split_url = condensed_data_holder[count][0][i].split("/") for k in range(0, len(full_model_variable_list)): if "/" in full_model_variable_list[k]: - full_model_variable_list[k] = split_url[len(split_url) - 1] - if data[j][fieldcolumn] == condensed_data_holder[count][0][i]: # in the dataframe, the name is in column 3 + full_model_variable_list[k] = split_url[ + len(split_url) - 1 + ] + if ( + data[j][fieldcolumn] == condensed_data_holder[count][0][i] + ): # in the dataframe, the name is in column 3 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][aboutcolumn] == condensed_data_holder[count][0][ - i]: + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif data[j][aboutcolumn] == condensed_data_holder[count][0][i]: condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][aboutcolumn] == split_url[len(split_url)-1]: #this is in case the uri only works by querying the part after the last backslash + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + data[j][aboutcolumn] == split_url[len(split_url) - 1] + ): # this is in case the uri only works by querying the part after the last backslash condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif condensed_data_holder[count][0][ - i] in data[j][aboutcolumn]: #this is in case the uri only works by querying the part after the last backslash + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + condensed_data_holder[count][0][i] in data[j][aboutcolumn] + ): # this is in case the uri only works by querying the part after the last backslash condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif data[j][namecolumn] == condensed_data_holder[count][0][i]: # in the dataframe, the name is in column 12 + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + data[j][namecolumn] == condensed_data_holder[count][0][i] + ): # in the dataframe, the name is in column 12 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values - elif condensed_data_holder[count][0][i] == data[j][datacolumn]: # in the dataframe, the name is in column 9 + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values + elif ( + condensed_data_holder[count][0][i] == data[j][datacolumn] + ): # in the dataframe, the name is in column 9 condensed_data_holder[count][numrows][i] = data[j][ - valuecolumn] # in the dataframe, the value is in column 2 - numrows = numrows + 1 # moves on to the next row to add the proper values + valuecolumn + ] # in the dataframe, the value is in column 2 + numrows = ( + numrows + 1 + ) # moves on to the next row to add the proper values except IndexError: numrows = numrows + 1 numrows = 1 # resets to the first row for the next variable temp_list = condensed_data_holder[count] - for j in range(len(temp_list[0])-1, 0,-1): # if the software appends a column with 0 as the heading, it removes this null column + for j in range( + len(temp_list[0]) - 1, 0, -1 + ): # if the software appends a column with 0 as the heading, it removes this null column if temp_list[0][j] == "0" or temp_list[0][j] == "NaN": for row in condensed_data_holder[count]: row.pop(j) @@ -275,11 +370,13 @@ def data_aggregation(): #all data from all the files is collected count1 = 0 for i in range(len(condensed_data_holder[count][0])): if " " in condensed_data_holder[count][0][i]: - condensed_data_holder[count][0][i] = condensed_data_holder[count][0][i].replace(" ", "_") + condensed_data_holder[count][0][i] = condensed_data_holder[count][ + 0 + ][i].replace(" ", "_") for i in range(len(independentvariables)): if "/" in independentvariables[i]: split = independentvariables[i].split("/") - independentvariables[i] = split[len(split)-1] + independentvariables[i] = split[len(split) - 1] if " " in independentvariables[i]: independentvariables[i] = independentvariables[i].replace(" ", "_") if " " in dep_var: @@ -287,45 +384,54 @@ def data_aggregation(): #all data from all the files is collected count = count + 1 if len(not_found_list) > 0: print( - "***********************************************************************************************************") + "***********************************************************************************************************" + ) print() print("Your model was " + m) print() print( - "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables.") - if (o is not None): + "The following variables were not found in " + + nidm_file + + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." + ) + if o is not None: f = open(o, "a") f.write("Your model was " + m) f.write( - "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables.") + "The following variables were not found in " + + nidm_file + + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." + ) f.close() for i in range(0, len(not_found_list)): print(str(i + 1) + ". " + not_found_list[i]) - if (o is not None): + if o is not None: f = open(o, "a") f.write(str(i + 1) + ". " + not_found_list[i]) f.close() - for j in range(len(not_found_list)-1, 0,-1): + for j in range(len(not_found_list) - 1, 0, -1): not_found_list.pop(j) not_found_count = not_found_count + 1 print() if not_found_count > 0: exit(1) - else: print("ERROR: No query parameter provided. See help:") print() os.system("pynidm linreg --help") exit(1) -def dataparsing(): #The data is changed to a format that is usable by the linear regression method + +def dataparsing(): # The data is changed to a format that is usable by the linear regression method global condensed_data condensed_data = [] for i in range(0, len(file_list)): condensed_data = condensed_data + condensed_data_holder[i] for i in range(len(condensed_data[0])): - if "/" in condensed_data[0][i]: #change any URLs to just the last part so contrasting works. + if ( + "/" in condensed_data[0][i] + ): # change any URLs to just the last part so contrasting works. split = condensed_data[0][i].split("/") condensed_data[0][i] = split[len(split) - 1] @@ -335,36 +441,50 @@ def dataparsing(): #The data is changed to a format that is usable by the linear If the user says Y instead, the code runs, but stops before doing the regularization.""" global answer answer = "?" - if(len(condensed_data)-1)<20: - print("\nYour data set has less than 20 points, which means the model calculated may not be accurate due to a lack of data. ") + if (len(condensed_data) - 1) < 20: + print( + "\nYour data set has less than 20 points, which means the model calculated may not be accurate due to a lack of data. " + ) print("This means you cannot regularize the data either.") import warnings + warnings.filterwarnings("ignore") answer = input("Continue anyways? Y or N: ") - if (o is not None): + if o is not None: f = open(o, "a") f.write("Your model was " + m) f.write( - "\n\nThere was a lack of data (<20 points) in your model, which may result in inaccuracies. In addition, a regularization cannot and will not be performed.\n") + "\n\nThere was a lack of data (<20 points) in your model, which may result in inaccuracies. In addition, a regularization cannot and will not be performed.\n" + ) f.close() if "n" in answer.lower(): print("\nModel halted.") - if (o is not None): + if o is not None: f = open(o, "a") f.write("Your model was " + m) - f.write("Due to a lack of data (<20 points), you stopped the model because the results may have been inaccurate.") + f.write( + "Due to a lack of data (<20 points), you stopped the model because the results may have been inaccurate." + ) f.close() exit(1) - x = pd.read_csv(opencsv(condensed_data)) # changes the dataframe to a csv to make it easier to work with + x = pd.read_csv( + opencsv(condensed_data) + ) # changes the dataframe to a csv to make it easier to work with x.head() # prints what the csv looks like x.dtypes # checks data format obj_df = x.select_dtypes # puts all the variables in a dataset x.shape # says number of rows and columns in form of tuple x.describe() # says dataset statistics - obj_df = x.select_dtypes(include=['object']).copy() # takes everything that is an object (not float or int) and puts it in a new dataset + obj_df = x.select_dtypes( + include=["object"] + ).copy() # takes everything that is an object (not float or int) and puts it in a new dataset obj_df.head() # prints the new dataset - int_df = x.select_dtypes(include=['int64']).copy() # takes everything that is an int and puts it in a new dataset - float_df = x.select_dtypes(include=['float64']).copy() # takes everything that is a float and puts it in a new dataset + int_df = x.select_dtypes( + include=["int64"] + ).copy() # takes everything that is an int and puts it in a new dataset + float_df = x.select_dtypes( + include=["float64"] + ).copy() # takes everything that is a float and puts it in a new dataset df_int_float = pd.concat([float_df, int_df], axis=1) variables = [] # starts a list that will store all variables that are not numbers for i in range(1, len(condensed_data)): # goes through each variable @@ -372,32 +492,45 @@ def dataparsing(): #The data is changed to a format that is usable by the linear try: # if the value of the field can be turned into a float (is numerical) float(condensed_data[i][j]) # this means it's a number except ValueError: # if it can't be (is a string) - if condensed_data[0][j] not in variables: # adds the variable name to the list if it isn't there already + if ( + condensed_data[0][j] not in variables + ): # adds the variable name to the list if it isn't there already variables.append(condensed_data[0][j]) - le = preprocessing.LabelEncoder() # anything involving le shows the encoding of categorical variables + le = ( + preprocessing.LabelEncoder() + ) # anything involving le shows the encoding of categorical variables for i in range(len(variables)): le.fit(obj_df[variables[i]].astype(str)) - obj_df_trf = obj_df.astype(str).apply(le.fit_transform) # transforms the categorical variables into numbers. - global df_final #also used in linreg() + obj_df_trf = obj_df.astype(str).apply( + le.fit_transform + ) # transforms the categorical variables into numbers. + global df_final # also used in linreg() if not obj_df_trf.empty: - df_final = pd.concat([df_int_float, obj_df_trf], axis=1) # join_axes=[df_int_float.index]) + df_final = pd.concat( + [df_int_float, obj_df_trf], axis=1 + ) # join_axes=[df_int_float.index]) else: df_final = df_int_float df_final.head() # shows the final dataset with all the encoding print(df_final) # prints the final dataset print() - print("***********************************************************************************************************") + print( + "***********************************************************************************************************" + ) print() - if (o is not None): - f = open(o,"a") + if o is not None: + f = open(o, "a") f.write(df_final.to_string(header=True, index=True)) - f.write("\n\n***********************************************************************************************************") + f.write( + "\n\n***********************************************************************************************************" + ) f.write("\n\nModel Results: ") f.close() -def linreg(): #actual linear regression + +def linreg(): # actual linear regression print("Model Results: ") - #printing the corrected model_string + # printing the corrected model_string model_string = [] model_string.append(dep_var) model_string.append(" ~ ") @@ -406,56 +539,60 @@ def linreg(): #actual linear regression model_string.append(" + ") model_string.pop(-1) global full_model - full_model = ''.join(model_string) - print(full_model) #prints model + full_model = "".join(model_string) + print(full_model) # prints model print() - print("***********************************************************************************************************") + print( + "***********************************************************************************************************" + ) print() index = 0 - global levels #also used in contrasting() + global levels # also used in contrasting() levels = [] for i in range(len(condensed_data[0])): if c == condensed_data[0][i]: index = i - for i in range(1,len(condensed_data)): + for i in range(1, len(condensed_data)): if condensed_data[i][index] not in levels: levels.append(condensed_data[i][index]) for i in range(len(levels)): levels[i] = i - #Beginning of the linear regression + # Beginning of the linear regression global X global y if "*" in m: - #correcting the format of the model string + # correcting the format of the model string model_string = [] model_string.append(dep_var) model_string.append(" ~ ") - for i in range(0,len(full_model_variable_list)): + for i in range(0, len(full_model_variable_list)): model_string.append(full_model_variable_list[i]) model_string.append(" + ") model_string.pop(-1) - for i in range(0,len(model_string)): + for i in range(0, len(model_string)): if "*" in model_string[i]: replacement = model_string[i].split("*") model_string[i] = replacement[0] + ":" + replacement[1] - #makes sure the model is in the right format. - string = ''.join(model_string) + # makes sure the model is in the right format. + string = "".join(model_string) y, X = dmatrices(string, df_final) else: - X = df_final[independentvariables] # gets the modified values of the independent variables - y = df_final[dep_var] # gets the modified values of the dependent variable + X = df_final[ + independentvariables + ] # gets the modified values of the independent variables + y = df_final[dep_var] # gets the modified values of the dependent variable if not c: - #The linear regression + # The linear regression regressor = LinearRegression() regressor.fit(X, y) regression = regressor.fit(X, y) - #Data about the linear regression, starting without contrast + # Data about the linear regression, starting without contrast X2 = sm.add_constant(X) statistics = sm.OLS(y, X2) finalstats = statistics.fit() print(finalstats.summary()) - if (o is not None): + if o is not None: # concatenate data frames """f = open(o,"a") f.write(full_model) @@ -464,67 +601,87 @@ def linreg(): #actual linear regression f.close()""" sys.stdout = open(o, "a") print(full_model) - print("\n*************************************************************************************\n") + print( + "\n*************************************************************************************\n" + ) print(finalstats.summary()) sys.stdout.close() return finalstats + + def contrasting(): global c if c: - #to account for multiple contrast variables + # to account for multiple contrast variables contrastvars = [] if "," in c: contrastvars = c.split(",") for i in range(len(contrastvars)): contrastvars[i] = contrastvars[i].strip() if " " in contrastvars[i]: - contrastvars[i]=contrastvars[i].replace(" ","_") - if "/" in contrastvars[i]: #to account for URLs + contrastvars[i] = contrastvars[i].replace(" ", "_") + if "/" in contrastvars[i]: # to account for URLs split = contrastvars[i].split("/") contrastvars[i] = split[len(split) - 1] else: - split = c.split("/") #to account for URLs + split = c.split("/") # to account for URLs c = split[len(split) - 1] - ind_vars_no_contrast_var = '' + ind_vars_no_contrast_var = "" index = 1 for i in range(len(full_model_variable_list)): if "/" in full_model_variable_list[i]: split = full_model_variable_list[i].split("/") full_model_variable_list[i] = split[len(split) - 1] if " " in full_model_variable_list[i]: - full_model_variable_list[i]=full_model_variable_list[i].replace(" ","_") + full_model_variable_list[i] = full_model_variable_list[i].replace( + " ", "_" + ) for var in full_model_variable_list: - if var != c and not(var in contrastvars): + if var != c and not (var in contrastvars): if index == 1: ind_vars_no_contrast_var = var index += 1 else: ind_vars_no_contrast_var = ind_vars_no_contrast_var + " + " + var - if len(contrastvars)>0: - contraststring = ' + '.join(contrastvars) + if len(contrastvars) > 0: + contraststring = " + ".join(contrastvars) else: if " " in c: c = c.replace(" ", "_") - contraststring=c + contraststring = c # With contrast (treatment coding) - print("\n\nTreatment (Dummy) Coding: Dummy coding compares each level of the categorical variable to a base reference level. The base reference level is the value of the intercept.") + print( + "\n\nTreatment (Dummy) Coding: Dummy coding compares each level of the categorical variable to a base reference level. The base reference level is the value of the intercept." + ) ctrst = Treatment(reference=0).code_without_intercept(levels) - mod = ols(dep_var + " ~ " + ind_vars_no_contrast_var + " + C(" + contraststring + ", Treatment)", data=df_final) + mod = ols( + dep_var + + " ~ " + + ind_vars_no_contrast_var + + " + C(" + + contraststring + + ", Treatment)", + data=df_final, + ) res = mod.fit() print("With contrast (treatment coding)") print(res.summary()) - if (o is not None): + if o is not None: # concatenate data frames f = open(o, "a") f.write("\n" + full_model) f.write( - "\n\n***********************************************************************************************************") + "\n\n***********************************************************************************************************" + ) - f.write("\n\n\n\nTreatment (Dummy) Coding: Dummy coding compares each level of the categorical variable to a base reference level. The base reference level is the value of the intercept.") + f.write( + "\n\n\n\nTreatment (Dummy) Coding: Dummy coding compares each level of the categorical variable to a base reference level. The base reference level is the value of the intercept." + ) f.write("With contrast (treatment coding)") f.write(res.summary().as_text()) f.close() + # Defining the Simple class def _name_levels(prefix, levels): return ["[%s%s]" % (prefix, level) for level in levels] @@ -532,12 +689,14 @@ def _name_levels(prefix, levels): class Simple(object): def _simple_contrast(self, levels): nlevels = len(levels) - contr = -1. / nlevels * np.ones((nlevels, nlevels - 1)) - contr[1:][np.diag_indices(nlevels - 1)] = (nlevels - 1.) / nlevels + contr = -1.0 / nlevels * np.ones((nlevels, nlevels - 1)) + contr[1:][np.diag_indices(nlevels - 1)] = (nlevels - 1.0) / nlevels return contr def code_with_intercept(self, levels): - c = np.column_stack((np.ones(len(levels)), self._simple_contrast(levels))) + c = np.column_stack( + (np.ones(len(levels)), self._simple_contrast(levels)) + ) return ContrastMatrix(c, _name_levels("Simp.", levels)) def code_without_intercept(self, levels): @@ -545,67 +704,120 @@ def code_without_intercept(self, levels): return ContrastMatrix(c, _name_levels("Simp.", levels[:-1])) ctrst = Simple().code_without_intercept(levels) - mod = ols(dep_var + " ~ " + ind_vars_no_contrast_var + " + C(" + contraststring + ", Simple)", data=df_final) + mod = ols( + dep_var + + " ~ " + + ind_vars_no_contrast_var + + " + C(" + + contraststring + + ", Simple)", + data=df_final, + ) res = mod.fit() - print("\n\nSimple Coding: Like Treatment Coding, Simple Coding compares each level to a fixed reference level. However, with simple coding, the intercept is the grand mean of all the levels of the factors.") + print( + "\n\nSimple Coding: Like Treatment Coding, Simple Coding compares each level to a fixed reference level. However, with simple coding, the intercept is the grand mean of all the levels of the factors." + ) print(res.summary()) - if (o is not None): + if o is not None: # concatenate data frames f = open(o, "a") - f.write("\n\n\nSimple Coding: Like Treatment Coding, Simple Coding compares each level to a fixed reference level. However, with simple coding, the intercept is the grand mean of all the levels of the factors.") + f.write( + "\n\n\nSimple Coding: Like Treatment Coding, Simple Coding compares each level to a fixed reference level. However, with simple coding, the intercept is the grand mean of all the levels of the factors." + ) f.write(res.summary().as_text()) f.close() - #With contrast (sum/deviation coding) + # With contrast (sum/deviation coding) ctrst = Sum().code_without_intercept(levels) - mod = ols(dep_var + " ~ " + ind_vars_no_contrast_var + " + C(" + contraststring + ", Sum)", data=df_final) + mod = ols( + dep_var + + " ~ " + + ind_vars_no_contrast_var + + " + C(" + + contraststring + + ", Sum)", + data=df_final, + ) res = mod.fit() - print("\n\nSum (Deviation) Coding: Sum coding compares the mean of the dependent variable for a given level to the overall mean of the dependent variable over all the levels.") + print( + "\n\nSum (Deviation) Coding: Sum coding compares the mean of the dependent variable for a given level to the overall mean of the dependent variable over all the levels." + ) print(res.summary()) - if (o is not None): + if o is not None: # concatenate data frames f = open(o, "a") - f.write("\n\n\nSum (Deviation) Coding: Sum coding compares the mean of the dependent variable for a given level to the overall mean of the dependent variable over all the levels.") + f.write( + "\n\n\nSum (Deviation) Coding: Sum coding compares the mean of the dependent variable for a given level to the overall mean of the dependent variable over all the levels." + ) f.write(res.summary().as_text()) f.close() - #With contrast (backward difference coding) + # With contrast (backward difference coding) ctrst = Diff().code_without_intercept(levels) - mod = ols(dep_var + " ~ " + ind_vars_no_contrast_var + " + C(" + contraststring + ", Diff)", data=df_final) + mod = ols( + dep_var + + " ~ " + + ind_vars_no_contrast_var + + " + C(" + + contraststring + + ", Diff)", + data=df_final, + ) res = mod.fit() - print("\n\nBackward Difference Coding: In backward difference coding, the mean of the dependent variable for a level is compared with the mean of the dependent variable for the prior level.") + print( + "\n\nBackward Difference Coding: In backward difference coding, the mean of the dependent variable for a level is compared with the mean of the dependent variable for the prior level." + ) print(res.summary()) - if (o is not None): + if o is not None: # concatenate data frames f = open(o, "a") - f.write("\n\n\nBackward Difference Coding: In backward difference coding, the mean of the dependent variable for a level is compared with the mean of the dependent variable for the prior level.") + f.write( + "\n\n\nBackward Difference Coding: In backward difference coding, the mean of the dependent variable for a level is compared with the mean of the dependent variable for the prior level." + ) f.write(res.summary().as_text()) f.close() - #With contrast (Helmert coding) + # With contrast (Helmert coding) ctrst = Helmert().code_without_intercept(levels) - mod = ols(dep_var + " ~ " + ind_vars_no_contrast_var + " + C(" + contraststring + ", Helmert)", data=df_final) + mod = ols( + dep_var + + " ~ " + + ind_vars_no_contrast_var + + " + C(" + + contraststring + + ", Helmert)", + data=df_final, + ) res = mod.fit() - print("\n\nHelmert Coding: Our version of Helmert coding is sometimes referred to as Reverse Helmert Coding. The mean of the dependent variable for a level is compared to the mean of the dependent variable over all previous levels. Hence, the name ‘reverse’ being sometimes applied to differentiate from forward Helmert coding.") + print( + "\n\nHelmert Coding: Our version of Helmert coding is sometimes referred to as Reverse Helmert Coding. The mean of the dependent variable for a level is compared to the mean of the dependent variable over all previous levels. Hence, the name ‘reverse’ being sometimes applied to differentiate from forward Helmert coding." + ) print(res.summary()) - if (o is not None): + if o is not None: # concatenate data frames f = open(o, "a") - f.write("\n\n\nHelmert Coding: Our version of Helmert coding is sometimes referred to as Reverse Helmert Coding. The mean of the dependent variable for a level is compared to the mean of the dependent variable over all previous levels. Hence, the name ‘reverse’ being sometimes applied to differentiate from forward Helmert coding.") + f.write( + "\n\n\nHelmert Coding: Our version of Helmert coding is sometimes referred to as Reverse Helmert Coding. The mean of the dependent variable for a level is compared to the mean of the dependent variable over all previous levels. Hence, the name ‘reverse’ being sometimes applied to differentiate from forward Helmert coding." + ) f.write(res.summary().as_text()) f.close() + def regularizing(): - if (r== ("L1" or "Lasso" or "l1" or "lasso") and not("y" in answer.lower())): #does it say L1, and has the user chosen to go ahead with running the code? + if r == ("L1" or "Lasso" or "l1" or "lasso") and not ( + "y" in answer.lower() + ): # does it say L1, and has the user chosen to go ahead with running the code? # Loop to compute the cross-validation scores max_cross_val_alpha = 1 - max_cross_val_score = -1000000000.000 #making it a super negative number initially + max_cross_val_score = ( + -1000000000.000 + ) # making it a super negative number initially for x in range(1, MAX_ALPHA): lassoModel = Lasso(alpha=x, tol=0.0925) lassoModel.fit(X, y) scores = cross_val_score(lassoModel, X, y, cv=10) avg_cross_val_score = mean(scores) * 100 - #figure out which setting of the regularization parameter results in the max likelihood score + # figure out which setting of the regularization parameter results in the max likelihood score if avg_cross_val_score > max_cross_val_score: max_cross_val_alpha = x max_cross_val_score = avg_cross_val_score @@ -614,36 +826,46 @@ def regularizing(): lassoModelChosen = Lasso(alpha=max_cross_val_alpha, tol=0.0925) lassoModelChosen.fit(X, y) print("\nLasso regression model:") - print("Alpha with maximum likelihood (range: 1 to %d) = %f" %(MAX_ALPHA, max_cross_val_alpha)) - print("Current Model Score = %f" %(lassoModelChosen.score(X, y))) + print( + "Alpha with maximum likelihood (range: 1 to %d) = %f" + % (MAX_ALPHA, max_cross_val_alpha) + ) + print("Current Model Score = %f" % (lassoModelChosen.score(X, y))) index = 0 print("\nCoefficients:") - if (o is not None): + if o is not None: # concatenate data frames f = open(o, "a") f.write("\n\nLasso regression model:") - f.write("\nAlpha with maximum likelihood (range: 1 to %d) = %f" %(MAX_ALPHA, max_cross_val_alpha)) - f.write("\nCurrent Model Score = %f" %(lassoModelChosen.score(X, y))) + f.write( + "\nAlpha with maximum likelihood (range: 1 to %d) = %f" + % (MAX_ALPHA, max_cross_val_alpha) + ) + f.write("\nCurrent Model Score = %f" % (lassoModelChosen.score(X, y))) f.write("\n\nCoefficients:") f.close() for var in full_model_variable_list: - print("%s \t %f" %(var,lassoModelChosen.coef_[index])) - if (o is not None): + print("%s \t %f" % (var, lassoModelChosen.coef_[index])) + if o is not None: with open(o, "a") as f: - f.write("\n%s \t %f" %(var, lassoModelChosen.coef_[index])) + f.write("\n%s \t %f" % (var, lassoModelChosen.coef_[index])) f.close() index = index + 1 - print("Intercept: %f" %(lassoModelChosen.intercept_)) - if (o is not None): + print("Intercept: %f" % (lassoModelChosen.intercept_)) + if o is not None: with open(o, "a") as f: - f.write("\nIntercept: %f" %(lassoModelChosen.intercept_)) + f.write("\nIntercept: %f" % (lassoModelChosen.intercept_)) f.close() print() - if (r== ("L2" or "Ridge" or "l2" or "Ridge") and not("y" in answer.lower())): #does it say L2, and has the user chosen to go ahead with running the code? + if r == ("L2" or "Ridge" or "l2" or "Ridge") and not ( + "y" in answer.lower() + ): # does it say L2, and has the user chosen to go ahead with running the code? # Loop to compute the different values of cross-validation scores max_cross_val_alpha = 1 - max_cross_val_score = -1000000000.000 # making it a super negative number initially + max_cross_val_score = ( + -1000000000.000 + ) # making it a super negative number initially for x in range(1, MAX_ALPHA): ridgeModel = Ridge(alpha=x, tol=0.0925) ridgeModel.fit(X, y) @@ -658,7 +880,10 @@ def regularizing(): ridgeModelChosen = Ridge(alpha=max_cross_val_alpha, tol=0.0925) ridgeModelChosen.fit(X, y) print("\nRidge regression model:") - print("Alpha with maximum likelihood (range: 1 to %d) = %f" % (MAX_ALPHA, max_cross_val_alpha)) + print( + "Alpha with maximum likelihood (range: 1 to %d) = %f" + % (MAX_ALPHA, max_cross_val_alpha) + ) print("Current Model Score = %f" % (ridgeModelChosen.score(X, y))) index = 0 """This numpy_conversion part was necessary because for the ridge model, all the coefficients get stored in a @@ -668,12 +893,15 @@ def regularizing(): for var in full_model_variable_list: if ("*" in var) or (":" in var): numpy_conversion = True - if (o is not None): + if o is not None: # concatenate data frames f = open(o, "a") f.write("\n\nRidge regression model:") - f.write("\nAlpha with maximum likelihood (range: 1 to %d) = %f" %(MAX_ALPHA, max_cross_val_alpha)) - f.write("\nCurrent Model Score = %f" %(ridgeModelChosen.score(X, y))) + f.write( + "\nAlpha with maximum likelihood (range: 1 to %d) = %f" + % (MAX_ALPHA, max_cross_val_alpha) + ) + f.write("\nCurrent Model Score = %f" % (ridgeModelChosen.score(X, y))) f.write("\n\nCoefficients:") f.close() print("\nCoefficients:") @@ -681,13 +909,13 @@ def regularizing(): coeff_list = ridgeModelChosen.coef_[index].tolist() coeff_list.pop(0) for var in full_model_variable_list: - print("%s \t %f" %(var, coeff_list[index])) - if (o is not None): - with open(o,"a") as f: + print("%s \t %f" % (var, coeff_list[index])) + if o is not None: + with open(o, "a") as f: f.write("\n%s \t %f" % (var, coeff_list[index])) index = index + 1 print("Intercept: %f" % (ridgeModelChosen.intercept_)) - if (o is not None): + if o is not None: with open(o, "a") as f: f.write("\nIntercept: %f" % (ridgeModelChosen.intercept_)) f.close() @@ -695,29 +923,33 @@ def regularizing(): else: for var in full_model_variable_list: print("%s \t %f" % (var, ridgeModelChosen.coef_[index])) - if (o is not None): - with open(o,"a") as f: + if o is not None: + with open(o, "a") as f: f.write("\n%s \t %f" % (var, ridgeModelChosen.coef_[index])) index = index + 1 print("Intercept: %f" % (ridgeModelChosen.intercept_)) - if (o is not None): + if o is not None: with open(o, "a") as f: f.write("\nIntercept: %f" % (ridgeModelChosen.intercept_)) f.close() print() + + def opencsv(data): """saves a list of lists as a csv and opens""" - import tempfile - import os import csv - handle, fn = tempfile.mkstemp(suffix='.csv') - with os.fdopen(handle,"w", encoding='utf8',errors='surrogateescape',newline='') as f: + import os + import tempfile + + handle, fn = tempfile.mkstemp(suffix=".csv") + with os.fdopen( + handle, "w", encoding="utf8", errors="surrogateescape", newline="" + ) as f: writer = csv.writer(f) writer.writerows(data) return fn + # it can be used calling the script `python nidm_query.py -nl ... -q .. if __name__ == "__main__": - linear_regression() - diff --git a/nidm/experiment/tools/nidm_merge.py b/nidm/experiment/tools/nidm_merge.py index 92806b4e..9b8ca1d8 100644 --- a/nidm/experiment/tools/nidm_merge.py +++ b/nidm/experiment/tools/nidm_merge.py @@ -1,74 +1,84 @@ #!/usr/bin/env python -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # nidm_utils.py # License: Apache License, Version 2.0 -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # Date: 11-28-18 Coded by: David Keator (dbkeator@gmail.com) # Filename: nidm_utils.py # # Program description: Tools for working with NIDM-Experiment files # -#************************************************************************************** +# ************************************************************************************** # Development environment: Python - PyCharm IDE # -#************************************************************************************** +# ************************************************************************************** # System requirements: Python 3.X # Libraries: pybids, numpy, matplotlib, pandas, scipy, math, dateutil, datetime,argparse, # os,sys,getopt,csv -#************************************************************************************** +# ************************************************************************************** # Start date: 11-28-18 # Update history: # DATE MODIFICATION Who # # -#************************************************************************************** +# ************************************************************************************** # Programmer comments: # # -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** -import os,sys from argparse import ArgumentParser -from rdflib import Graph,util -from rdflib.tools import rdf2dot -from nidm.experiment.Utils import read_nidm -from nidm.experiment.Query import GetParticipantIDs -from nidm.core import Constants from io import StringIO -from os.path import basename,splitext +import os +from os.path import basename, splitext import subprocess -from graphviz import Source +import sys import tempfile import click +from graphviz import Source +from nidm.core import Constants +from nidm.experiment.Query import GetParticipantIDs +from nidm.experiment.Utils import read_nidm from nidm.experiment.tools.click_base import cli +from rdflib import Graph, util +from rdflib.tools import rdf2dot + # adding click argument parsing @cli.command() -@click.option("--nidm_file_list", "-nl", required=True, - help="A comma separated list of NIDM files with full path") -@click.option("--s", "-s", required=False,is_flag=True, - help="If parameter set then files will be merged by ndar:src_subjec_id of prov:agents") -@click.option("--out_file", "-o", required=True, - help="File to write concatenated NIDM files") - - -def merge(nidm_file_list, s,out_file): +@click.option( + "--nidm_file_list", + "-nl", + required=True, + help="A comma separated list of NIDM files with full path", +) +@click.option( + "--s", + "-s", + required=False, + is_flag=True, + help="If parameter set then files will be merged by ndar:src_subjec_id of prov:agents", +) +@click.option( + "--out_file", "-o", required=True, help="File to write concatenated NIDM files" +) +def merge(nidm_file_list, s, out_file): """ This function will merge NIDM files. See command line parameters for supported merge operations. """ - #graph = Graph() - #for nidm_file in nidm_file_list.split(','): + # graph = Graph() + # for nidm_file in nidm_file_list.split(','): # graph.parse(nidm_file,format=util.guess_format(nidm_file)) # create empty graph - graph=Graph() + graph = Graph() # start with the first NIDM file and merge the rest into the first - first=True - for nidm_file in nidm_file_list.split(','): + first = True + for nidm_file in nidm_file_list.split(","): # if merging by subject: if s: if first: @@ -76,17 +86,18 @@ def merge(nidm_file_list, s,out_file): first_file_subjids = GetParticipantIDs([nidm_file]) first = False first_graph = Graph() - first_graph.parse(nidm_file,format=util.guess_format(nidm_file)) + first_graph.parse(nidm_file, format=util.guess_format(nidm_file)) else: # load second graph - graph.parse(nidm_file,format=util.guess_format(nidm_file)) + graph.parse(nidm_file, format=util.guess_format(nidm_file)) # get list of second file subject IDs subj = GetParticipantIDs([nidm_file]) # for each UUID / subject ID look in graph and see if you can find the same ID. If so get the UUID of # that prov:agent and change all the UUIDs in nidm_file to match then concatenate the two graphs. - query = ''' + query = ( + """ PREFIX prov: PREFIX sio: @@ -100,16 +111,18 @@ def merge(nidm_file_list, s,out_file): ?uuid a prov:Agent ; %s ?ID . FILTER(?ID = - ''' % Constants.NIDM_SUBJECTID + """ + % Constants.NIDM_SUBJECTID + ) # add filters to above query to only look for subject IDs which are in the first file to merge into - temp=True - for ID in first_file_subjids['ID']: + temp = True + for ID in first_file_subjids["ID"]: if temp: - query = query + "\"" + ID + "\"" + query = query + '"' + ID + '"' temp = False else: - query = query + "|| ?ID= \"" + ID + "\"" + query = query + '|| ?ID= "' + ID + '"' query = query + ") }" @@ -118,37 +131,38 @@ def merge(nidm_file_list, s,out_file): # if len(qres) > 0 then we have matches so load the nidm_file into a temporary graph so we can # make changes to it then concatenate it. if len(qres) > 0: - #tmp = Graph() - #tmp.parse(nidm_file,format=util.guess_format(nidm_file)) + # tmp = Graph() + # tmp.parse(nidm_file,format=util.guess_format(nidm_file)) # for each ID in the merged graph that matches an ID in the nidm_file graph for row in qres: # find ID from first file that matches ID in this file - t=first_file_subjids['ID'].str.match(row['ID']) + t = first_file_subjids["ID"].str.match(row["ID"]) # then get uuid for that match from first file - uuid_replacement = first_file_subjids.iloc[ [*filter(t.get,t.index)][0],0] - - for s,p,o in graph.triples((None,None,None)): - if (s == row['uuid']): - #print("replacing subject in triple %s %s %s with %s" %(s,p,o,uuid_to_replace)) - graph.add((uuid_replacement,p,o)) - graph.remove((row['uuid'],p,o)) - elif (o == row['uuid']): - #print("replacing object in triple %s %s %s with %s" %(s,p,o,uuid_to_replace)) - graph.add((s,p,uuid_replacement)) - graph.remove((s,p,row['uuid'])) - elif (p == row['uuid']): - #print("replacing predicate in triple %s %s %s with %s" %(s,p,o,uuid_to_replace)) - graph.add((s,uuid_replacement,o)) - graph.remove((s,row['uuid'],o)) + uuid_replacement = first_file_subjids.iloc[ + [*filter(t.get, t.index)][0], 0 + ] + + for s, p, o in graph.triples((None, None, None)): + if s == row["uuid"]: + # print("replacing subject in triple %s %s %s with %s" %(s,p,o,uuid_to_replace)) + graph.add((uuid_replacement, p, o)) + graph.remove((row["uuid"], p, o)) + elif o == row["uuid"]: + # print("replacing object in triple %s %s %s with %s" %(s,p,o,uuid_to_replace)) + graph.add((s, p, uuid_replacement)) + graph.remove((s, p, row["uuid"])) + elif p == row["uuid"]: + # print("replacing predicate in triple %s %s %s with %s" %(s,p,o,uuid_to_replace)) + graph.add((s, uuid_replacement, o)) + graph.remove((s, row["uuid"], o)) # merge updated graph graph = first_graph + graph - graph.serialize(out_file, format='turtle') - + graph.serialize(out_file, format="turtle") if __name__ == "__main__": - merge() + merge() diff --git a/nidm/experiment/tools/nidm_query.py b/nidm/experiment/tools/nidm_query.py index 688f20cd..acda2030 100644 --- a/nidm/experiment/tools/nidm_query.py +++ b/nidm/experiment/tools/nidm_query.py @@ -1,88 +1,173 @@ #!/usr/bin/env python -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # nidm_query.py # License: Apache License, Version 2.0 -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # Date: 8-1-18 Coded by: David Keator (dbkeator@gmail.com) # Filename: nidm_query.py # # Program description: This program provides query functionality for NIDM-Experiment files # # -#************************************************************************************** +# ************************************************************************************** # Development environment: Python - PyCharm IDE # -#************************************************************************************** +# ************************************************************************************** # System requirements: Python 3.X # Libraries: os, sys, rdflib, pandas, argparse, logging -#************************************************************************************** +# ************************************************************************************** # Start date: 8-1-18 # Update history: # DATE MODIFICATION Who # # -#************************************************************************************** +# ************************************************************************************** # Programmer comments: # # -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** -import os, sys -from rdflib import Graph, util -import pandas as pd from argparse import ArgumentParser -import logging import csv -from nidm.experiment.Query import sparql_query_nidm, GetParticipantIDs,GetProjectInstruments,GetProjectsUUID,GetInstrumentVariables,GetDataElements,GetBrainVolumes,GetBrainVolumeDataElements -from nidm.experiment.CDE import getCDEs +from json import dumps, loads +import logging +import os +import sys import click -from click_option_group import optgroup, RequiredMutuallyExclusiveOptionGroup +from click_option_group import RequiredMutuallyExclusiveOptionGroup, optgroup +from nidm.experiment.CDE import getCDEs +from nidm.experiment.Query import ( + GetBrainVolumeDataElements, + GetBrainVolumes, + GetDataElements, + GetInstrumentVariables, + GetParticipantIDs, + GetProjectInstruments, + GetProjectsUUID, + sparql_query_nidm, +) from nidm.experiment.tools.click_base import cli from nidm.experiment.tools.rest import RestParser -from json import dumps, loads +import pandas as pd +from rdflib import Graph, util @cli.command() -@click.option("--nidm_file_list", "-nl", required=True, - help="A comma separated list of NIDM files with full path") -@click.option("--cde_file_list", "-nc", required=False, - help="A comma separated list of NIDM CDE files with full path. Can also be set in the CDE_DIR environment variable") -@optgroup.group('Query Type',help='Pick among the following query type selections',cls=RequiredMutuallyExclusiveOptionGroup) -@optgroup.option("--query_file", "-q", type=click.File('r'), - help="Text file containing a SPARQL query to execute") -@optgroup.option("--get_participants", "-p", is_flag=True, - help="Parameter, if set, query will return participant IDs and prov:agent entity IDs") -@optgroup.option("--get_instruments", "-i", is_flag=True, - help="Parameter, if set, query will return list of onli:assessment-instrument:") -@optgroup.option("--get_instrument_vars", "-iv", is_flag=True, - help="Parameter, if set, query will return list of onli:assessment-instrument: variables") -@optgroup.option("--get_dataelements", "-de", is_flag=True, - help="Parameter, if set, will return all DataElements in NIDM file") -@optgroup.option("--get_dataelements_brainvols", "-debv", is_flag=True, - help="Parameter, if set, will return all brain volume DataElements in NIDM file along with details") -@optgroup.option("--get_brainvols", "-bv", is_flag=True, - help="Parameter, if set, will return all brain volume data elements and values along with participant IDs in NIDM file") -@optgroup.option("--get_fields", "-gf", - help="This parameter will return data for only the field names in the comma separated list (e.g. -gf age,fs_00003) from all nidm files supplied") -@optgroup.option("--uri", "-u", - help="A REST API URI query") -@click.option("--output_file", "-o", required=False, - help="Optional output file (CSV) to store results of query") -@click.option("-j/-no_j", required=False, default=False, - help="Return result of a uri query as JSON") -@click.option("--blaze", "-bg", required=False, - help="Base URL for Blazegraph. Ex: http://172.19.0.2:9999/blazegraph/sparql") -@click.option('-v', '--verbosity', required=False, help="Verbosity level 0-5, 0 is default", default="0") - -def query(nidm_file_list, cde_file_list, query_file, output_file, get_participants, get_instruments, get_instrument_vars, get_dataelements, get_brainvols,get_dataelements_brainvols, get_fields, uri, blaze, j, verbosity): +@click.option( + "--nidm_file_list", + "-nl", + required=True, + help="A comma separated list of NIDM files with full path", +) +@click.option( + "--cde_file_list", + "-nc", + required=False, + help="A comma separated list of NIDM CDE files with full path. Can also be set in the CDE_DIR environment variable", +) +@optgroup.group( + "Query Type", + help="Pick among the following query type selections", + cls=RequiredMutuallyExclusiveOptionGroup, +) +@optgroup.option( + "--query_file", + "-q", + type=click.File("r"), + help="Text file containing a SPARQL query to execute", +) +@optgroup.option( + "--get_participants", + "-p", + is_flag=True, + help="Parameter, if set, query will return participant IDs and prov:agent entity IDs", +) +@optgroup.option( + "--get_instruments", + "-i", + is_flag=True, + help="Parameter, if set, query will return list of onli:assessment-instrument:", +) +@optgroup.option( + "--get_instrument_vars", + "-iv", + is_flag=True, + help="Parameter, if set, query will return list of onli:assessment-instrument: variables", +) +@optgroup.option( + "--get_dataelements", + "-de", + is_flag=True, + help="Parameter, if set, will return all DataElements in NIDM file", +) +@optgroup.option( + "--get_dataelements_brainvols", + "-debv", + is_flag=True, + help="Parameter, if set, will return all brain volume DataElements in NIDM file along with details", +) +@optgroup.option( + "--get_brainvols", + "-bv", + is_flag=True, + help="Parameter, if set, will return all brain volume data elements and values along with participant IDs in NIDM file", +) +@optgroup.option( + "--get_fields", + "-gf", + help="This parameter will return data for only the field names in the comma separated list (e.g. -gf age,fs_00003) from all nidm files supplied", +) +@optgroup.option("--uri", "-u", help="A REST API URI query") +@click.option( + "--output_file", + "-o", + required=False, + help="Optional output file (CSV) to store results of query", +) +@click.option( + "-j/-no_j", + required=False, + default=False, + help="Return result of a uri query as JSON", +) +@click.option( + "--blaze", + "-bg", + required=False, + help="Base URL for Blazegraph. Ex: http://172.19.0.2:9999/blazegraph/sparql", +) +@click.option( + "-v", + "--verbosity", + required=False, + help="Verbosity level 0-5, 0 is default", + default="0", +) +def query( + nidm_file_list, + cde_file_list, + query_file, + output_file, + get_participants, + get_instruments, + get_instrument_vars, + get_dataelements, + get_brainvols, + get_dataelements_brainvols, + get_fields, + uri, + blaze, + j, + verbosity, +): """ This function provides query support for NIDM graphs. """ - #query result list + # query result list results = [] # if there is a CDE file list, seed the CDE cache @@ -94,66 +179,71 @@ def query(nidm_file_list, cde_file_list, query_file, output_file, get_participan print("setting BLAZEGRAPH_URL to {}".format(blaze)) if get_participants: - df = GetParticipantIDs(nidm_file_list.split(','),output_file=output_file) - if ((output_file) is None): - + df = GetParticipantIDs(nidm_file_list.split(","), output_file=output_file) + if (output_file) is None: print(df.to_string()) - return df elif get_instruments: - #first get all project UUIDs then iterate and get instruments adding to output dataframe - project_list = GetProjectsUUID(nidm_file_list.split(',')) - count=1 + # first get all project UUIDs then iterate and get instruments adding to output dataframe + project_list = GetProjectsUUID(nidm_file_list.split(",")) + count = 1 for project in project_list: if count == 1: - df = GetProjectInstruments(nidm_file_list.split(','),project_id=project) - count+=1 + df = GetProjectInstruments( + nidm_file_list.split(","), project_id=project + ) + count += 1 else: - df = df.append(GetProjectInstruments(nidm_file_list.split(','),project_id=project)) - - #write dataframe - #if output file parameter specified - if (output_file is not None): + df = df.append( + GetProjectInstruments(nidm_file_list.split(","), project_id=project) + ) + # write dataframe + # if output file parameter specified + if output_file is not None: df.to_csv(output_file) - #with open(output_file,'w') as myfile: + # with open(output_file,'w') as myfile: # wr=csv.writer(myfile,quoting=csv.QUOTE_ALL) # wr.writerow(df) - #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file) + # pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file) else: print(df.to_string()) elif get_instrument_vars: - #first get all project UUIDs then iterate and get instruments adding to output dataframe - project_list = GetProjectsUUID(nidm_file_list.split(',')) - count=1 + # first get all project UUIDs then iterate and get instruments adding to output dataframe + project_list = GetProjectsUUID(nidm_file_list.split(",")) + count = 1 for project in project_list: if count == 1: - df = GetInstrumentVariables(nidm_file_list.split(','),project_id=project) - count+=1 + df = GetInstrumentVariables( + nidm_file_list.split(","), project_id=project + ) + count += 1 else: - df = df.append(GetInstrumentVariables(nidm_file_list.split(','),project_id=project)) - - #write dataframe - #if output file parameter specified - if (output_file is not None): + df = df.append( + GetInstrumentVariables( + nidm_file_list.split(","), project_id=project + ) + ) + # write dataframe + # if output file parameter specified + if output_file is not None: df.to_csv(output_file) else: print(df.to_string()) elif get_dataelements: datael = GetDataElements(nidm_file_list=nidm_file_list) - #if output file parameter specified - if (output_file is not None): - + # if output file parameter specified + if output_file is not None: datael.to_csv(output_file) else: print(datael.to_string()) elif get_fields: # fields only query. We'll do it with the rest api restParser = RestParser(verbosity_level=int(verbosity)) - if (output_file is not None): + if output_file is not None: restParser.setOutputFormat(RestParser.OBJECT_FORMAT) df_list = [] else: @@ -162,61 +252,62 @@ def query(nidm_file_list, cde_file_list, query_file, output_file, get_participan for nidm_file in nidm_file_list.split(","): # get project UUID project = GetProjectsUUID([nidm_file]) - uri = "/projects/" + project[0].toPython().split("/")[-1] + "?fields=" + get_fields + uri = ( + "/projects/" + + project[0].toPython().split("/")[-1] + + "?fields=" + + get_fields + ) # get fields output from each file and concatenate - if (output_file is None): + if output_file is None: # just print results print(restParser.run([nidm_file], uri)) else: df_list.append(pd.DataFrame(restParser.run([nidm_file], uri))) - if (output_file is not None): + if output_file is not None: # concatenate data frames df = pd.concat(df_list) # output to csv file df.to_csv(output_file) elif uri: - restParser = RestParser(verbosity_level = int(verbosity)) + restParser = RestParser(verbosity_level=int(verbosity)) if j: restParser.setOutputFormat(RestParser.JSON_FORMAT) - elif (output_file is not None): + elif output_file is not None: restParser.setOutputFormat(RestParser.OBJECT_FORMAT) else: restParser.setOutputFormat(RestParser.CLI_FORMAT) - df = restParser.run(nidm_file_list.split(','), uri) - if (output_file is not None): + df = restParser.run(nidm_file_list.split(","), uri) + if output_file is not None: if j: - with open(output_file,"w+") as f: + with open(output_file, "w+") as f: f.write(dumps(df)) else: # convert object df to dataframe and output pd.DataFrame(df).to_csv(output_file) else: - print (df) + print(df) elif get_dataelements_brainvols: brainvol = GetBrainVolumeDataElements(nidm_file_list=nidm_file_list) - #if output file parameter specified - if (output_file is not None): - + # if output file parameter specified + if output_file is not None: brainvol.to_csv(output_file) else: print(brainvol.to_string()) elif get_brainvols: brainvol = GetBrainVolumes(nidm_file_list=nidm_file_list) - #if output file parameter specified - if (output_file is not None): - + # if output file parameter specified + if output_file is not None: brainvol.to_csv(output_file) else: print(brainvol.to_string()) elif query_file: + df = sparql_query_nidm(nidm_file_list.split(","), query_file, output_file) - df = sparql_query_nidm(nidm_file_list.split(','),query_file,output_file) - - if ((output_file) is None): - + if (output_file) is None: print(df.to_string()) return df diff --git a/nidm/experiment/tools/nidm_utils.py b/nidm/experiment/tools/nidm_utils.py index 1f06dde0..0463bc07 100644 --- a/nidm/experiment/tools/nidm_utils.py +++ b/nidm/experiment/tools/nidm_utils.py @@ -1,95 +1,118 @@ #!/usr/bin/env python -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # nidm_utils.py # License: Apache License, Version 2.0 -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # Date: 11-28-18 Coded by: David Keator (dbkeator@gmail.com) # Filename: nidm_utils.py # # Program description: Tools for working with NIDM-Experiment files # -#************************************************************************************** +# ************************************************************************************** # Development environment: Python - PyCharm IDE # -#************************************************************************************** +# ************************************************************************************** # System requirements: Python 3.X # Libraries: pybids, numpy, matplotlib, pandas, scipy, math, dateutil, datetime,argparse, # os,sys,getopt,csv -#************************************************************************************** +# ************************************************************************************** # Start date: 11-28-18 # Update history: # DATE MODIFICATION Who # # -#************************************************************************************** +# ************************************************************************************** # Programmer comments: # # -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** -import os,sys from argparse import ArgumentParser -from rdflib import Graph,util -from rdflib.tools import rdf2dot -from nidm.experiment.Utils import read_nidm from io import StringIO -from os.path import basename,splitext +import os +from os.path import basename, splitext import subprocess -from graphviz import Source +import sys import tempfile +from graphviz import Source +from nidm.experiment.Utils import read_nidm +from rdflib import Graph, util +from rdflib.tools import rdf2dot -def main(): - - parser = ArgumentParser(description='This program contains various NIDM-Experiment utilities') - sub = parser.add_subparsers(dest='command') - concat = sub.add_parser('concat', description="This command will simply concatenate the supplied NIDM files into a single output") - visualize = sub.add_parser('visualize', description="This command will produce a visualization(pdf) of the supplied NIDM files") - jsonld = sub.add_parser('jsonld', description="This command will save NIDM files as jsonld") - - for arg in [concat,visualize,jsonld]: - arg.add_argument('-nl', '--nl', dest="nidm_files", nargs="+", required=True, help="A comma separated list of NIDM files with full path") - concat.add_argument('-o', '--o', dest='output_file', required=True, help="Merged NIDM output file name + path") +def main(): + parser = ArgumentParser( + description="This program contains various NIDM-Experiment utilities" + ) + sub = parser.add_subparsers(dest="command") + concat = sub.add_parser( + "concat", + description="This command will simply concatenate the supplied NIDM files into a single output", + ) + visualize = sub.add_parser( + "visualize", + description="This command will produce a visualization(pdf) of the supplied NIDM files", + ) + jsonld = sub.add_parser( + "jsonld", description="This command will save NIDM files as jsonld" + ) + + for arg in [concat, visualize, jsonld]: + arg.add_argument( + "-nl", + "--nl", + dest="nidm_files", + nargs="+", + required=True, + help="A comma separated list of NIDM files with full path", + ) + + concat.add_argument( + "-o", + "--o", + dest="output_file", + required=True, + help="Merged NIDM output file name + path", + ) # visualize.add_argument('-o', '--o', dest='output_file', required=True, help="Output file name+path of dot graph") + args = parser.parse_args() - args=parser.parse_args() - - #concatenate nidm files - if args.command == 'concat': - - #create empty graph - graph=Graph() + # concatenate nidm files + if args.command == "concat": + # create empty graph + graph = Graph() for nidm_file in args.nidm_files: - tmp = Graph() - graph = graph + tmp.parse(nidm_file,format=util.guess_format(nidm_file)) - - graph.serialize(args.output_file, format='turtle') - - + tmp = Graph() + graph = graph + tmp.parse(nidm_file, format=util.guess_format(nidm_file)) - elif args.command == 'visualize': + graph.serialize(args.output_file, format="turtle") + elif args.command == "visualize": for nidm_file in args.nidm_files: # read in nidm file - project=read_nidm(nidm_file) + project = read_nidm(nidm_file) # split path and filename for output file writing file_parts = os.path.split(nidm_file) # write graph as nidm filename + .pdf - project.save_DotGraph(filename=os.path.join(file_parts[0], os.path.splitext(file_parts[1])[0] + '.pdf'), format='pdf' ) - - #create empty graph - #graph=Graph() - #for nidm_file in args.nidm_files: + project.save_DotGraph( + filename=os.path.join( + file_parts[0], os.path.splitext(file_parts[1])[0] + ".pdf" + ), + format="pdf", + ) + + # create empty graph + # graph=Graph() + # for nidm_file in args.nidm_files: # tmp = Graph() # graph = graph + tmp.parse(nidm_file,format=util.guess_format(nidm_file)) - # project=read_nidm(StringIO.write(graph.serialize(format='turtle'))) # project.save_DotGraph(filename=args.output_file+'.pdf',format='pdf') # WIP: Workaround because not all NIDM files only contain NIDM-E objects and so read_nidm function needs to be @@ -98,26 +121,24 @@ def main(): # result is the standard output dot graph stream # write temporary file to disk and use for stats - #temp = tempfile.NamedTemporaryFile(delete=False) - #temp.write(graph.serialize(format='turtle')) - #temp.close() - #uber_nidm_file = temp.name - #result = subprocess.run(['rdf2dot',uber_nidm_file], stdout=subprocess.PIPE) + # temp = tempfile.NamedTemporaryFile(delete=False) + # temp.write(graph.serialize(format='turtle')) + # temp.close() + # uber_nidm_file = temp.name + # result = subprocess.run(['rdf2dot',uber_nidm_file], stdout=subprocess.PIPE) # now use graphviz Source to create dot graph object - #src=Source(result) - #src.render(args.output_file+'.pdf',view=False,format='pdf') - - + # src=Source(result) + # src.render(args.output_file+'.pdf',view=False,format='pdf') - elif args.command == 'jsonld': - #create empty graph + elif args.command == "jsonld": + # create empty graph for nidm_file in args.nidm_files: - project=read_nidm(nidm_file) - #serialize to jsonld - with open(splitext(nidm_file)[0]+".json",'w') as f: + project = read_nidm(nidm_file) + # serialize to jsonld + with open(splitext(nidm_file)[0] + ".json", "w") as f: f.write(project.serializeJSONLD()) if __name__ == "__main__": - main() + main() diff --git a/nidm/experiment/tools/nidm_version.py b/nidm/experiment/tools/nidm_version.py index 88561afd..9b6718e8 100644 --- a/nidm/experiment/tools/nidm_version.py +++ b/nidm/experiment/tools/nidm_version.py @@ -1,18 +1,18 @@ -import os,sys - +import os +import sys import click from nidm.experiment.tools.click_base import cli from nidm.version import __version__ + # adding click argument parsing @cli.command() - def version(): - ''' + """ This function will print the version of pynidm. - ''' - print("PyNIDM Version: %s" %__version__) + """ + print("PyNIDM Version: %s" % __version__) if __name__ == "__main__": - version() \ No newline at end of file + version() diff --git a/nidm/experiment/tools/nidm_visualize.py b/nidm/experiment/tools/nidm_visualize.py index f29c9ca9..067c53a4 100644 --- a/nidm/experiment/tools/nidm_visualize.py +++ b/nidm/experiment/tools/nidm_visualize.py @@ -1,72 +1,79 @@ #!/usr/bin/env python -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # nidm_utils.py # License: Apache License, Version 2.0 -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # Date: 11-28-18 Coded by: David Keator (dbkeator@gmail.com) # Filename: nidm_utils.py # # Program description: Tools for working with NIDM-Experiment files # -#************************************************************************************** +# ************************************************************************************** # Development environment: Python - PyCharm IDE # -#************************************************************************************** +# ************************************************************************************** # System requirements: Python 3.X # Libraries: pybids, numpy, matplotlib, pandas, scipy, math, dateutil, datetime,argparse, # os,sys,getopt,csv -#************************************************************************************** +# ************************************************************************************** # Start date: 11-28-18 # Update history: # DATE MODIFICATION Who # # -#************************************************************************************** +# ************************************************************************************** # Programmer comments: # # -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** -import os,sys from argparse import ArgumentParser -from rdflib import Graph,util -from rdflib.tools import rdf2dot -from nidm.experiment.Utils import read_nidm from io import StringIO -from os.path import basename,splitext +import os +from os.path import basename, splitext import subprocess -from graphviz import Source +import sys import tempfile import click +from graphviz import Source +from nidm.experiment.Utils import read_nidm from nidm.experiment.tools.click_base import cli +from rdflib import Graph, util +from rdflib.tools import rdf2dot + # adding click argument parsing @cli.command() -@click.option("--nidm_file_list", "-nl", required=True, - help="A comma separated list of NIDM files with full path") - - - +@click.option( + "--nidm_file_list", + "-nl", + required=True, + help="A comma separated list of NIDM files with full path", +) def visualize(nidm_file_list): - ''' + """ This command will produce a visualization(pdf) of the supplied NIDM files named the same as the input files and stored in the same directories. - ''' + """ - for nidm_file in nidm_file_list.split(','): + for nidm_file in nidm_file_list.split(","): # read in nidm file - project=read_nidm(nidm_file) + project = read_nidm(nidm_file) # split path and filename for output file writing file_parts = os.path.split(nidm_file) # write graph as nidm filename + .pdf - project.save_DotGraph(filename=os.path.join(file_parts[0], os.path.splitext(file_parts[1])[0] + '.pdf'), format='pdf' ) - + project.save_DotGraph( + filename=os.path.join( + file_parts[0], os.path.splitext(file_parts[1])[0] + ".pdf" + ), + format="pdf", + ) if __name__ == "__main__": - visualize() + visualize() diff --git a/nidm/experiment/tools/repronim_simple2_brainvolumes.py b/nidm/experiment/tools/repronim_simple2_brainvolumes.py index e54a0c93..28da5e6e 100644 --- a/nidm/experiment/tools/repronim_simple2_brainvolumes.py +++ b/nidm/experiment/tools/repronim_simple2_brainvolumes.py @@ -1,71 +1,84 @@ #!/usr/bin/env python -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # repronim_simple2_brainvolumes.py # License: Apache License, Version 2.0 -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** # Date: 03-22-19 Coded by: David Keator (dbkeator@gmail.com) # Filename: repronim_simple2_brainvolumes.py # # Program description: This program will load in a CSV file made during simple-2 # brain volumes experiment which has the following organization: # -#source FSL FSL FSL +# source FSL FSL FSL # participant_id left nucleus accumbens volume left amygdala volume left caudate nucleus volume -#sub-0050002 796.4723293 1255.574283 4449.579039 -#sub-0050003 268.9688215 878.7860634 3838.602449 -#sub-0050004 539.0969914 1195.288168 3561.518188 +# sub-0050002 796.4723293 1255.574283 4449.579039 +# sub-0050003 268.9688215 878.7860634 3838.602449 +# sub-0050004 539.0969914 1195.288168 3561.518188 # # If will use the first row to determine the software used for the segmentations and the # second row for the variable names. Then it does a simple NIDM conversion using # example model in: https://docs.google.com/document/d/1PyBoM7J0TuzTC1TIIFPDqd05nomcCM5Pvst8yCoqLng/edit -#************************************************************************************** +# ************************************************************************************** # Development environment: Python - PyCharm IDE # -#************************************************************************************** +# ************************************************************************************** # System requirements: Python 3.X # Libraries: pybids, numpy, matplotlib, pandas, scipy, math, dateutil, datetime,argparse, # os,sys,getopt,csv -#************************************************************************************** +# ************************************************************************************** # Start date: 03-22-18 # Update history: # DATE MODIFICATION Who # # -#************************************************************************************** +# ************************************************************************************** # Programmer comments: # # -#************************************************************************************** -#************************************************************************************** +# ************************************************************************************** +# ************************************************************************************** -import os,sys -from nidm.experiment import Project,Session,AssessmentAcquisition,AssessmentObject -from nidm.core import Constants -from nidm.experiment.Utils import read_nidm, map_variables_to_terms, getSubjIDColumn -from nidm.experiment.Core import getUUID -from nidm.experiment.Core import Core -from prov.model import QualifiedName,PROV_ROLE, ProvDocument, PROV_ATTR_USED_ENTITY -from prov.model import Namespace as provNamespace -import prov as pm from argparse import ArgumentParser -from os.path import dirname, join, splitext,basename -import json -import pandas as pd -from rdflib import Graph,URIRef,RDF -import numpy as np from io import StringIO +import json +import os +from os.path import basename, dirname, join, splitext +import sys from urllib.parse import urlparse +from nidm.core import Constants +from nidm.experiment import AssessmentAcquisition, AssessmentObject, Project, Session +from nidm.experiment.Core import Core, getUUID +from nidm.experiment.Utils import getSubjIDColumn, map_variables_to_terms, read_nidm +import numpy as np +import pandas as pd +import prov as pm +from prov.model import Namespace as provNamespace +from prov.model import PROV_ATTR_USED_ENTITY, PROV_ROLE +from prov.model import ProvDocument, QualifiedName +from rdflib import RDF, Graph, URIRef + def column_index(df, query_cols): cols = df.columns.values sidx = np.argsort(cols) - return sidx[np.searchsorted(cols,query_cols,sorter=sidx)] - -def add_brainvolume_data(nidmdoc, df, id_field, source_row, column_to_terms, png_file=None, output_file=None, root_act=None, nidm_graph=None): - ''' + return sidx[np.searchsorted(cols, query_cols, sorter=sidx)] + + +def add_brainvolume_data( + nidmdoc, + df, + id_field, + source_row, + column_to_terms, + png_file=None, + output_file=None, + root_act=None, + nidm_graph=None, +): + """ :param nidmdoc: :param df: @@ -75,95 +88,199 @@ def add_brainvolume_data(nidmdoc, df, id_field, source_row, column_to_terms, png :param png_file: :param root_act: :return: - ''' - #dictionary to store activities for each software agent - software_agent={} - software_activity={} - participant_agent={} - entity={} + """ + # dictionary to store activities for each software agent + software_agent = {} + software_activity = {} + participant_agent = {} + entity = {} - #this function can be used for both creating a brainvolumes NIDM file from scratch or adding brain volumes to - #existing NIDM file. The following logic basically determines which route to take... + # this function can be used for both creating a brainvolumes NIDM file from scratch or adding brain volumes to + # existing NIDM file. The following logic basically determines which route to take... - #if an existing NIDM graph is passed as a parameter then add to existing file + # if an existing NIDM graph is passed as a parameter then add to existing file if nidm_graph is None: - first_row=True - #iterate over rows and store in NIDM file + first_row = True + # iterate over rows and store in NIDM file for csv_index, csv_row in df.iterrows(): - - #store other data from row with columns_to_term mappings - for row_variable,row_data in csv_row.iteritems(): - - #check if row_variable is subject id, if so check whether we have an agent for this participant - if row_variable==id_field: - #store participant id for later use in processing the data for this row + # store other data from row with columns_to_term mappings + for row_variable, row_data in csv_row.iteritems(): + # check if row_variable is subject id, if so check whether we have an agent for this participant + if row_variable == id_field: + # store participant id for later use in processing the data for this row participant_id = row_data - #if there is no agent for the participant then add one + # if there is no agent for the participant then add one if row_data not in participant_agent.keys(): - #add an agent for this person - participant_agent[row_data] = nidmdoc.graph.agent(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes=({Constants.NIDM_SUBJECTID:row_data})) + # add an agent for this person + participant_agent[row_data] = nidmdoc.graph.agent( + QualifiedName( + provNamespace("nidm", Constants.NIDM), getUUID() + ), + other_attributes=({Constants.NIDM_SUBJECTID: row_data}), + ) continue else: - - #get source software matching this column deal with duplicate variables in source_row and pandas changing duplicate names - software_key = source_row.columns[[column_index(df,row_variable)]]._values[0].split(".")[0] - - #see if we already have a software_activity for this agent + # get source software matching this column deal with duplicate variables in source_row and pandas changing duplicate names + software_key = ( + source_row.columns[[column_index(df, row_variable)]] + ._values[0] + .split(".")[0] + ) + + # see if we already have a software_activity for this agent if software_key not in software_activity.keys(): - - #create an activity for the computation...simply a placeholder for more extensive provenance - software_activity[software_key] = nidmdoc.graph.activity(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"brain volume computation"}) + # create an activity for the computation...simply a placeholder for more extensive provenance + software_activity[software_key] = nidmdoc.graph.activity( + QualifiedName( + provNamespace("nidm", Constants.NIDM), getUUID() + ), + other_attributes={ + Constants.NIDM_PROJECT_DESCRIPTION: "brain volume computation" + }, + ) if root_act is not None: - #associate activity with activity of brain volumes creation (root-level activity) - software_activity[software_key].add_attributes({QualifiedName(provNamespace("dct",Constants.DCT),'isPartOf'):root_act}) - - #associate this activity with the participant - nidmdoc.graph.association(activity=software_activity[software_key],agent=participant_agent[participant_id],other_attributes={PROV_ROLE:Constants.NIDM_PARTICIPANT}) - nidmdoc.graph.wasAssociatedWith(activity=software_activity[software_key],agent=participant_agent[participant_id]) - - #check if there's an associated software agent and if not, create one + # associate activity with activity of brain volumes creation (root-level activity) + software_activity[software_key].add_attributes( + { + QualifiedName( + provNamespace("dct", Constants.DCT), "isPartOf" + ): root_act + } + ) + + # associate this activity with the participant + nidmdoc.graph.association( + activity=software_activity[software_key], + agent=participant_agent[participant_id], + other_attributes={PROV_ROLE: Constants.NIDM_PARTICIPANT}, + ) + nidmdoc.graph.wasAssociatedWith( + activity=software_activity[software_key], + agent=participant_agent[participant_id], + ) + + # check if there's an associated software agent and if not, create one if software_key not in software_agent.keys(): - #create an agent - software_agent[software_key] = nidmdoc.graph.agent(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes={'prov:type':QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),""), - QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),""):software_key } ) - #create qualified association with brain volume computation activity - nidmdoc.graph.association(activity=software_activity[software_key],agent=software_agent[software_key],other_attributes={PROV_ROLE:QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),"")}) - nidmdoc.graph.wasAssociatedWith(activity=software_activity[software_key],agent=software_agent[software_key]) - - #check if we have an entity for storing this particular variable for this subject and software else create one - if software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart not in entity.keys(): - #create an entity to store brain volume data for this participant - entity[software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart] = nidmdoc.graph.entity( QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID())) - #add wasGeneratedBy association to activity - nidmdoc.graph.wasGeneratedBy(entity=entity[software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart], activity=software_activity[software_key]) - - #get column_to_term mapping uri and add as namespace in NIDM document - entity[software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart].add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable.split(".")[0]]["url"]),""):row_data}) - #print(project.serializeTurtle()) - - - #just for debugging. resulting graph is too big right now for DOT graph creation so here I'm simply creating - #a DOT graph for the processing of 1 row of the brain volumes CSV file so we can at least visually see the - #model + # create an agent + software_agent[software_key] = nidmdoc.graph.agent( + QualifiedName( + provNamespace("nidm", Constants.NIDM), getUUID() + ), + other_attributes={ + "prov:type": QualifiedName( + provNamespace( + Core.safe_string( + None, + string=str( + "Neuroimaging Analysis Software" + ), + ), + Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, + ), + "", + ), + QualifiedName( + provNamespace( + Core.safe_string( + None, + string=str( + "Neuroimaging Analysis Software" + ), + ), + Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, + ), + "", + ): software_key, + }, + ) + # create qualified association with brain volume computation activity + nidmdoc.graph.association( + activity=software_activity[software_key], + agent=software_agent[software_key], + other_attributes={ + PROV_ROLE: QualifiedName( + provNamespace( + Core.safe_string( + None, + string=str( + "Neuroimaging Analysis Software" + ), + ), + Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, + ), + "", + ) + }, + ) + nidmdoc.graph.wasAssociatedWith( + activity=software_activity[software_key], + agent=software_agent[software_key], + ) + + # check if we have an entity for storing this particular variable for this subject and software else create one + if ( + software_activity[software_key].identifier.localpart + + participant_agent[participant_id].identifier.localpart + not in entity.keys() + ): + # create an entity to store brain volume data for this participant + entity[ + software_activity[software_key].identifier.localpart + + participant_agent[participant_id].identifier.localpart + ] = nidmdoc.graph.entity( + QualifiedName( + provNamespace("nidm", Constants.NIDM), getUUID() + ) + ) + # add wasGeneratedBy association to activity + nidmdoc.graph.wasGeneratedBy( + entity=entity[ + software_activity[software_key].identifier.localpart + + participant_agent[participant_id].identifier.localpart + ], + activity=software_activity[software_key], + ) + + # get column_to_term mapping uri and add as namespace in NIDM document + entity[ + software_activity[software_key].identifier.localpart + + participant_agent[participant_id].identifier.localpart + ].add_attributes( + { + QualifiedName( + provNamespace( + Core.safe_string(None, string=str(row_variable)), + column_to_terms[row_variable.split(".")[0]]["url"], + ), + "", + ): row_data + } + ) + # print(project.serializeTurtle()) + + # just for debugging. resulting graph is too big right now for DOT graph creation so here I'm simply creating + # a DOT graph for the processing of 1 row of the brain volumes CSV file so we can at least visually see the + # model if png_file is not None: if first_row: - #serialize NIDM file - #with open(args.output_file,'w') as f: + # serialize NIDM file + # with open(args.output_file,'w') as f: # print("Writing NIDM file...") # f.write(nidmdoc.serializeTurtle()) if png_file: nidmdoc.save_DotGraph(str(output_file + ".pdf"), format="pdf") - first_row=False + first_row = False else: - first_row=True - #logic to add to existing graph - #use RDFLib here for temporary graph making query easier + first_row = True + # logic to add to existing graph + # use RDFLib here for temporary graph making query easier rdf_graph = Graph() - rdf_graph_parse = rdf_graph.parse(source=StringIO(nidmdoc.serializeTurtle()),format='turtle') - + rdf_graph_parse = rdf_graph.parse( + source=StringIO(nidmdoc.serializeTurtle()), format="turtle" + ) - #find subject ids and sessions in NIDM document + # find subject ids and sessions in NIDM document query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent ?entity WHERE { ?activity prov:wasAssociatedWith ?agent ; @@ -174,118 +291,268 @@ def add_brainvolume_data(nidmdoc, df, id_field, source_row, column_to_terms, png ndar:src_subject_id ?nidm_subj_id . }""" - #print(query) + # print(query) qres = rdf_graph_parse.query(query) - - for row in qres: - print('%s \t %s' %(row[2],row[1])) - #find row in CSV file with subject id matching agent from NIDM file - - #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])] - #find row in CSV file with matching subject id to the agent in the NIDM file - #be careful about data types...simply type-change dataframe subject id column and query to strings. - #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of - #time which column is the subject id.... - csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))] - - #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id) - #then add this brain volumes data to NIDM file, else skip it.... - if (not (len(csv_row.index)==0)): - print("found other data for participant %s" %row[1]) - - #Here we're sure we have an agent in the NIDM graph that corresponds to the participant in the - #brain volumes data. We don't know which AcquisitionObject (entity) describes the T1-weighted scans - #used for the project. Since we don't have the SHA512 sums in the brain volumes data (YET) we can't - #really verify that it's a particular T1-weighted scan that was used for the brain volumes but we're - #simply, for the moment, going to assume it's the activity/session returned by the above query - #where we've specifically asked for the entity which has a nidm:hasImageUsageType nidm:Anatomical - - - - #NIDM document entity uuid which has a nidm:hasImageUsageType nidm:Anatomical - #this is the entity that is associated with the brain volume report for this participant + print("%s \t %s" % (row[2], row[1])) + # find row in CSV file with subject id matching agent from NIDM file + + # csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])] + # find row in CSV file with matching subject id to the agent in the NIDM file + # be careful about data types...simply type-change dataframe subject id column and query to strings. + # here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of + # time which column is the subject id.... + csv_row = df.loc[ + df[id_field].astype("str").str.contains(str(row[1]).lstrip("0")) + ] + + # if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id) + # then add this brain volumes data to NIDM file, else skip it.... + if not (len(csv_row.index) == 0): + print("found other data for participant %s" % row[1]) + + # Here we're sure we have an agent in the NIDM graph that corresponds to the participant in the + # brain volumes data. We don't know which AcquisitionObject (entity) describes the T1-weighted scans + # used for the project. Since we don't have the SHA512 sums in the brain volumes data (YET) we can't + # really verify that it's a particular T1-weighted scan that was used for the brain volumes but we're + # simply, for the moment, going to assume it's the activity/session returned by the above query + # where we've specifically asked for the entity which has a nidm:hasImageUsageType nidm:Anatomical + + # NIDM document entity uuid which has a nidm:hasImageUsageType nidm:Anatomical + # this is the entity that is associated with the brain volume report for this participant anat_entity_uuid = row[3] - #Now we need to set up the entities/activities, etc. to add the brain volume data for this row of the - #CSV file and link it to the above entity and the agent for this participant which is row[0] - #store other data from row with columns_to_term mappings - for row_variable,row_data in csv_row.iteritems(): - - #check if row_variable is subject id, if so check whether we have an agent for this participant - if row_variable==id_field: - #store participant id for later use in processing the data for this row + # Now we need to set up the entities/activities, etc. to add the brain volume data for this row of the + # CSV file and link it to the above entity and the agent for this participant which is row[0] + # store other data from row with columns_to_term mappings + for row_variable, row_data in csv_row.iteritems(): + # check if row_variable is subject id, if so check whether we have an agent for this participant + if row_variable == id_field: + # store participant id for later use in processing the data for this row participant_id = row_data.values[0] - print("participant id: %s" %participant_id) + print("participant id: %s" % participant_id) continue else: - - #get source software matching this column deal with duplicate variables in source_row and pandas changing duplicate names - software_key = source_row.columns[[column_index(df,row_variable)]]._values[0].split(".")[0] - - #see if we already have a software_activity for this agent - if software_key+row[2] not in software_activity.keys(): - - #create an activity for the computation...simply a placeholder for more extensive provenance - software_activity[software_key+row[2]] = nidmdoc.graph.activity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"brain volume computation", - PROV_ATTR_USED_ENTITY:anat_entity_uuid}) - - #associate the activity with the entity containing the original T1-weighted scan which is stored in anat_entity_uuid + # get source software matching this column deal with duplicate variables in source_row and pandas changing duplicate names + software_key = ( + source_row.columns[[column_index(df, row_variable)]] + ._values[0] + .split(".")[0] + ) + + # see if we already have a software_activity for this agent + if software_key + row[2] not in software_activity.keys(): + # create an activity for the computation...simply a placeholder for more extensive provenance + software_activity[ + software_key + row[2] + ] = nidmdoc.graph.activity( + QualifiedName( + provNamespace("niiri", Constants.NIIRI), getUUID() + ), + other_attributes={ + Constants.NIDM_PROJECT_DESCRIPTION: "brain volume computation", + PROV_ATTR_USED_ENTITY: anat_entity_uuid, + }, + ) + + # associate the activity with the entity containing the original T1-weighted scan which is stored in anat_entity_uuid if root_act is not None: - #associate activity with activity of brain volumes creation (root-level activity) - software_activity[software_key+row[2]].add_attributes({QualifiedName(provNamespace("dct",Constants.DCT),'isPartOf'):root_act}) - - - - #associate this activity with the participant..the participant's agent is row[2] in the query response - nidmdoc.graph.association(activity=software_activity[software_key+row[2]],agent=row[2],other_attributes={PROV_ROLE:Constants.NIDM_PARTICIPANT}) - nidmdoc.graph.wasAssociatedWith(activity=software_activity[software_key+row[2]],agent=row[2]) - - - #check if there's an associated software agent and if not, create one + # associate activity with activity of brain volumes creation (root-level activity) + software_activity[software_key + row[2]].add_attributes( + { + QualifiedName( + provNamespace("dct", Constants.DCT), + "isPartOf", + ): root_act + } + ) + + # associate this activity with the participant..the participant's agent is row[2] in the query response + nidmdoc.graph.association( + activity=software_activity[software_key + row[2]], + agent=row[2], + other_attributes={ + PROV_ROLE: Constants.NIDM_PARTICIPANT + }, + ) + nidmdoc.graph.wasAssociatedWith( + activity=software_activity[software_key + row[2]], + agent=row[2], + ) + + # check if there's an associated software agent and if not, create one if software_key not in software_agent.keys(): - #if we have a URL defined for this software in Constants.py then use it else simply use the string name of the software product + # if we have a URL defined for this software in Constants.py then use it else simply use the string name of the software product if software_key.lower() in Constants.namespaces: - #create an agent - software_agent[software_key] = nidmdoc.graph.agent(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={'prov:type':QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),""), - QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),""):QualifiedName(provNamespace(software_key,Constants.namespaces[software_key.lower()]),"") } ) + # create an agent + software_agent[software_key] = nidmdoc.graph.agent( + QualifiedName( + provNamespace("niiri", Constants.NIIRI), + getUUID(), + ), + other_attributes={ + "prov:type": QualifiedName( + provNamespace( + Core.safe_string( + None, + string=str( + "Neuroimaging Analysis Software" + ), + ), + Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, + ), + "", + ), + QualifiedName( + provNamespace( + Core.safe_string( + None, + string=str( + "Neuroimaging Analysis Software" + ), + ), + Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, + ), + "", + ): QualifiedName( + provNamespace( + software_key, + Constants.namespaces[ + software_key.lower() + ], + ), + "", + ), + }, + ) else: - #create an agent - software_agent[software_key] = nidmdoc.graph.agent(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={'prov:type':QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),""), - QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),""):software_key } ) - #create qualified association with brain volume computation activity - nidmdoc.graph.association(activity=software_activity[software_key+row[2]],agent=software_agent[software_key],other_attributes={PROV_ROLE:QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),"")}) - nidmdoc.graph.wasAssociatedWith(activity=software_activity[software_key+row[2]],agent=software_agent[software_key]) - - #check if we have an entity for storing this particular variable for this subject and software else create one - if software_activity[software_key+row[2]].identifier.localpart + row[2] not in entity.keys(): - #create an entity to store brain volume data for this participant - entity[software_activity[software_key+row[2]].identifier.localpart + row[2]] = nidmdoc.graph.entity( QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID())) - #add wasGeneratedBy association to activity - nidmdoc.graph.wasGeneratedBy(entity=entity[software_activity[software_key+row[2]].identifier.localpart + row[2]], activity=software_activity[software_key+row[2]]) - - #get column_to_term mapping uri and add as namespace in NIDM document - entity[software_activity[software_key+row[2]].identifier.localpart + row[2]].add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable.split(".")[0]]["url"]),""):row_data.values[0]}) - #print(project.serializeTurtle()) - - #just for debugging. resulting graph is too big right now for DOT graph creation so here I'm simply creating - #a DOT graph for the processing of 1 row of the brain volumes CSV file so we can at least visually see the - #model - #if png_file is not None: + # create an agent + software_agent[software_key] = nidmdoc.graph.agent( + QualifiedName( + provNamespace("niiri", Constants.NIIRI), + getUUID(), + ), + other_attributes={ + "prov:type": QualifiedName( + provNamespace( + Core.safe_string( + None, + string=str( + "Neuroimaging Analysis Software" + ), + ), + Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, + ), + "", + ), + QualifiedName( + provNamespace( + Core.safe_string( + None, + string=str( + "Neuroimaging Analysis Software" + ), + ), + Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, + ), + "", + ): software_key, + }, + ) + # create qualified association with brain volume computation activity + nidmdoc.graph.association( + activity=software_activity[software_key + row[2]], + agent=software_agent[software_key], + other_attributes={ + PROV_ROLE: QualifiedName( + provNamespace( + Core.safe_string( + None, + string=str( + "Neuroimaging Analysis Software" + ), + ), + Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE, + ), + "", + ) + }, + ) + nidmdoc.graph.wasAssociatedWith( + activity=software_activity[software_key + row[2]], + agent=software_agent[software_key], + ) + + # check if we have an entity for storing this particular variable for this subject and software else create one + if ( + software_activity[ + software_key + row[2] + ].identifier.localpart + + row[2] + not in entity.keys() + ): + # create an entity to store brain volume data for this participant + entity[ + software_activity[ + software_key + row[2] + ].identifier.localpart + + row[2] + ] = nidmdoc.graph.entity( + QualifiedName( + provNamespace("niiri", Constants.NIIRI), getUUID() + ) + ) + # add wasGeneratedBy association to activity + nidmdoc.graph.wasGeneratedBy( + entity=entity[ + software_activity[ + software_key + row[2] + ].identifier.localpart + + row[2] + ], + activity=software_activity[software_key + row[2]], + ) + + # get column_to_term mapping uri and add as namespace in NIDM document + entity[ + software_activity[ + software_key + row[2] + ].identifier.localpart + + row[2] + ].add_attributes( + { + QualifiedName( + provNamespace( + Core.safe_string( + None, string=str(row_variable) + ), + column_to_terms[row_variable.split(".")[0]][ + "url" + ], + ), + "", + ): row_data.values[0] + } + ) + # print(project.serializeTurtle()) + + # just for debugging. resulting graph is too big right now for DOT graph creation so here I'm simply creating + # a DOT graph for the processing of 1 row of the brain volumes CSV file so we can at least visually see the + # model + # if png_file is not None: # if first_row: - #serialize NIDM file - #with open(args.output_file,'w') as f: - # print("Writing NIDM file...") - # f.write(nidmdoc.serializeTurtle()) + # serialize NIDM file + # with open(args.output_file,'w') as f: + # print("Writing NIDM file...") + # f.write(nidmdoc.serializeTurtle()) # nidmdoc.save_DotGraph(str(output_file + ".pdf"), format="pdf") # first_row=False - - def main(argv): - parser = ArgumentParser(description="""This program will load in a CSV file made during simple-2 + parser = ArgumentParser( + description="""This program will load in a CSV file made during simple-2 brain volumes experiment which has the following organization: source FSL FSL FSL participant_id left nucleus accumbens volume left amygdala volume @@ -294,172 +561,227 @@ def main(argv): sub-0050004 539.0969914 1195.288168 3561.518188 If will use the first row to determine the software used for the segmentations and the second row for the variable names. Then it does a simple NIDM conversion using - example model in: https://docs.google.com/document/d/1PyBoM7J0TuzTC1TIIFPDqd05nomcCM5Pvst8yCoqLng/edit""") - - parser.add_argument('-csv', dest='csv_file', required=True, help="Path to CSV file to convert") - parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query") - parser.add_argument('-json_map', dest='json_map',required=False,help="User-suppled JSON file containing variable-term mappings.") - parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional NIDM file to add CSV->NIDM converted graph to") - parser.add_argument('-owl', action='store_true', required=False, help='Optionally searches NIDM OWL files...internet connection required') - parser.add_argument('-png', action='store_true', required=False, help='Optional flag, when set a PNG image file of RDF graph will be produced') - parser.add_argument('-out', dest='output_file', required=True, help="Filename to save NIDM file") + example model in: https://docs.google.com/document/d/1PyBoM7J0TuzTC1TIIFPDqd05nomcCM5Pvst8yCoqLng/edit""" + ) + + parser.add_argument( + "-csv", dest="csv_file", required=True, help="Path to CSV file to convert" + ) + parser.add_argument( + "-ilxkey", + dest="key", + required=True, + help="Interlex/SciCrunch API key to use for query", + ) + parser.add_argument( + "-json_map", + dest="json_map", + required=False, + help="User-suppled JSON file containing variable-term mappings.", + ) + parser.add_argument( + "-nidm", + dest="nidm_file", + required=False, + help="Optional NIDM file to add CSV->NIDM converted graph to", + ) + parser.add_argument( + "-owl", + action="store_true", + required=False, + help="Optionally searches NIDM OWL files...internet connection required", + ) + parser.add_argument( + "-png", + action="store_true", + required=False, + help="Optional flag, when set a PNG image file of RDF graph will be produced", + ) + parser.add_argument( + "-out", dest="output_file", required=True, help="Filename to save NIDM file" + ) args = parser.parse_args() - #open CSV file and read first line which is the source of the segmentations + # open CSV file and read first line which is the source of the segmentations source_row = pd.read_csv(args.csv_file, nrows=0) - #open CSV file and load into - df = pd.read_csv(args.csv_file, skiprows=0,header=1) - #account for duplicate column names + # open CSV file and load into + df = pd.read_csv(args.csv_file, skiprows=0, header=1) + # account for duplicate column names # df.columns = df.iloc[0] df = df.reindex(df.index.drop(0)).reset_index(drop=True) - - #get unique variable names from CSV data file - #note, duplicate variable names will be appended with a ".X" where X is the number of duplicates - unique_vars=[] + # get unique variable names from CSV data file + # note, duplicate variable names will be appended with a ".X" where X is the number of duplicates + unique_vars = [] for variable in list(df): - temp=variable.split(".")[0] + temp = variable.split(".")[0] if temp not in unique_vars: unique_vars.append(temp) - #do same as above for unique software agents - unique_software=[] + # do same as above for unique software agents + unique_software = [] for variable in list(source_row): - temp=variable.split(".")[0] + temp = variable.split(".")[0] if temp not in unique_software: unique_software.append(temp) - - #maps variables in CSV file to terms + # maps variables in CSV file to terms if args.owl: - column_to_terms = map_variables_to_terms(df=pd.DataFrame(columns=unique_vars), apikey=args.key, directory=dirname(args.output_file), output_file=join(dirname(args.output_file),"json_map.json"), json_file=args.json_map,owl_file=args.owl) + column_to_terms = map_variables_to_terms( + df=pd.DataFrame(columns=unique_vars), + apikey=args.key, + directory=dirname(args.output_file), + output_file=join(dirname(args.output_file), "json_map.json"), + json_file=args.json_map, + owl_file=args.owl, + ) else: - column_to_terms = map_variables_to_terms(df=pd.DataFrame(columns=unique_vars), apikey=args.key, directory=dirname(args.output_file), output_file=join(dirname(args.output_file),"json_map.json"), json_file=args.json_map) - - #get subjectID field from CSV - id_field = getSubjIDColumn(column_to_terms,df) + column_to_terms = map_variables_to_terms( + df=pd.DataFrame(columns=unique_vars), + apikey=args.key, + directory=dirname(args.output_file), + output_file=join(dirname(args.output_file), "json_map.json"), + json_file=args.json_map, + ) + # get subjectID field from CSV + id_field = getSubjIDColumn(column_to_terms, df) # WIP!!!######################################################################################### - #go line by line through CSV file creating NIDM structures - #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file + # go line by line through CSV file creating NIDM structures + # If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file if args.nidm_file is not None: print("Adding to NIDM file...") - #read in NIDM file + # read in NIDM file project = read_nidm(args.nidm_file) - - root_act = project.graph.activity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"Brain volumes provenance document"}) - - #this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file.... - add_brainvolume_data(nidmdoc=project,df=df,id_field=id_field,root_act=root_act,column_to_terms=column_to_terms,png_file=args.png,output_file=args.output_file,source_row=source_row,nidm_graph=True) - - #serialize NIDM file - with open(args.output_file,'w') as f: + root_act = project.graph.activity( + QualifiedName(provNamespace("niiri", Constants.NIIRI), getUUID()), + other_attributes={ + Constants.NIDM_PROJECT_DESCRIPTION: "Brain volumes provenance document" + }, + ) + + # this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file.... + add_brainvolume_data( + nidmdoc=project, + df=df, + id_field=id_field, + root_act=root_act, + column_to_terms=column_to_terms, + png_file=args.png, + output_file=args.output_file, + source_row=source_row, + nidm_graph=True, + ) + + # serialize NIDM file + with open(args.output_file, "w") as f: print("Writing NIDM file...") f.write(project.serializeTurtle()) - #if args.png: + # if args.png: # nidmdoc.save_DotGraph(str(args.output_file + ".png"), format="png") - - -# #find subject ids and sessions in NIDM document -# query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent ?entity -# WHERE { -# ?activity prov:wasAssociatedWith ?agent ; -# dct:isPartOf ?session . -# ?entity prov:wasGeneratedBy ?activity ; -# nidm:hasImageUsageType nidm:Anatomical . -# ?agent rdf:type prov:Agent ; -# ndar:src_subject_id ?nidm_subj_id . -# -# }""" -# #print(query) -# qres = rdf_graph_parse.query(query) - - - -# for row in qres: -# print('%s \t %s' %(row[0],row[1])) -# #find row in CSV file with subject id matching agent from NIDM file - -# #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])] -# #find row in CSV file with matching subject id to the agent in the NIDM file -# #be careful about data types...simply type-change dataframe subject id column and query to strings. -# #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of -# #time which column is the subject id.... -# csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))] - -# #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id) -# #then add this brain volumes data to NIDM file, else skip it.... -# if (not (len(csv_row.index)==0)): - - #Here we're sure we have an agent in the NIDM graph that corresponds to the participant in the - #brain volumes data. We don't know which AcquisitionObject (entity) describes the T1-weighted scans - #used for the project. Since we don't have the SHA512 sums in the brain volumes data (YET) we can't - #really verify that it's a particular T1-weighted scan that was used for the brain volumes but we're - #simply, for the moment, going to assume it's the activity/session returned by the above query - #where we've specifically asked for the entity which has a nidm:hasImageUsageType nidm:Anatomical - - - - #NIDM document entity uuid which has a nidm:hasImageUsageType nidm:Anatomical - #this is the entity that is associated with the brain volume report for this participant -# entity_uuid = row[3] - - #Now we need to set up the entities/activities, etc. to add the brain volume data for this row of the - #CSV file and link it to the above entity and the agent for this participant which is row[0] - - - - - - #add acquisition entity for assessment -# acq_entity = AssessmentObject(acquisition=acq) - #add qualified association with existing agent -# acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT) - -# #store other data from row with columns_to_term mappings -# for row_variable in csv_row: - #check if row_variable is subject id, if so skip it -# if row_variable==id_field: -# continue -# else: - #get column_to_term mapping uri and add as namespace in NIDM document - #provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]) -# acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]), ""):csv_row[row_variable].values[0]}) -# continue - -# #serialize NIDM file -# with open(args.nidm_file,'w') as f: -# print("Writing NIDM file...") -# f.write(project.serializeTurtle()) -# project.save_DotGraph(str(args.nidm_file + ".png"), format="png") + # #find subject ids and sessions in NIDM document + # query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent ?entity + # WHERE { + # ?activity prov:wasAssociatedWith ?agent ; + # dct:isPartOf ?session . + # ?entity prov:wasGeneratedBy ?activity ; + # nidm:hasImageUsageType nidm:Anatomical . + # ?agent rdf:type prov:Agent ; + # ndar:src_subject_id ?nidm_subj_id . + # + # }""" + # #print(query) + # qres = rdf_graph_parse.query(query) + + # for row in qres: + # print('%s \t %s' %(row[0],row[1])) + # #find row in CSV file with subject id matching agent from NIDM file + + # #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])] + # #find row in CSV file with matching subject id to the agent in the NIDM file + # #be careful about data types...simply type-change dataframe subject id column and query to strings. + # #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of + # #time which column is the subject id.... + # csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))] + + # #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id) + # #then add this brain volumes data to NIDM file, else skip it.... + # if (not (len(csv_row.index)==0)): + + # Here we're sure we have an agent in the NIDM graph that corresponds to the participant in the + # brain volumes data. We don't know which AcquisitionObject (entity) describes the T1-weighted scans + # used for the project. Since we don't have the SHA512 sums in the brain volumes data (YET) we can't + # really verify that it's a particular T1-weighted scan that was used for the brain volumes but we're + # simply, for the moment, going to assume it's the activity/session returned by the above query + # where we've specifically asked for the entity which has a nidm:hasImageUsageType nidm:Anatomical + + # NIDM document entity uuid which has a nidm:hasImageUsageType nidm:Anatomical + # this is the entity that is associated with the brain volume report for this participant + # entity_uuid = row[3] + + # Now we need to set up the entities/activities, etc. to add the brain volume data for this row of the + # CSV file and link it to the above entity and the agent for this participant which is row[0] + + # add acquisition entity for assessment + # acq_entity = AssessmentObject(acquisition=acq) + # add qualified association with existing agent + # acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT) + + # #store other data from row with columns_to_term mappings + # for row_variable in csv_row: + # check if row_variable is subject id, if so skip it + # if row_variable==id_field: + # continue + # else: + # get column_to_term mapping uri and add as namespace in NIDM document + # provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]) + # acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]), ""):csv_row[row_variable].values[0]}) + # continue + + # #serialize NIDM file + # with open(args.nidm_file,'w') as f: + # print("Writing NIDM file...") + # f.write(project.serializeTurtle()) + # project.save_DotGraph(str(args.nidm_file + ".png"), format="png") ############################################################################################################################## - else: print("Creating NIDM file...") - #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data + # If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data - #create an empty NIDM graph + # create an empty NIDM graph nidmdoc = Core() - root_act = nidmdoc.graph.activity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"Brain volumes provenance document"}) - - #this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file.... - add_brainvolume_data(nidmdoc=nidmdoc,df=df,id_field=id_field,root_act=root_act,column_to_terms=column_to_terms,png_file=args.png,output_file=args.output_file,source_row=source_row) - - - - #serialize NIDM file - with open(args.output_file,'w') as f: + root_act = nidmdoc.graph.activity( + QualifiedName(provNamespace("niiri", Constants.NIIRI), getUUID()), + other_attributes={ + Constants.NIDM_PROJECT_DESCRIPTION: "Brain volumes provenance document" + }, + ) + + # this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file.... + add_brainvolume_data( + nidmdoc=nidmdoc, + df=df, + id_field=id_field, + root_act=root_act, + column_to_terms=column_to_terms, + png_file=args.png, + output_file=args.output_file, + source_row=source_row, + ) + + # serialize NIDM file + with open(args.output_file, "w") as f: print("Writing NIDM file...") f.write(nidmdoc.serializeTurtle()) if args.png: - # nidmdoc.save_DotGraph(str(args.output_file + ".png"), format="png") + # nidmdoc.save_DotGraph(str(args.output_file + ".png"), format="png") nidmdoc.save_DotGraph(str(args.output_file + ".pdf"), format="pdf") if __name__ == "__main__": - main(sys.argv[1:]) + main(sys.argv[1:]) diff --git a/nidm/experiment/tools/repronim_simple2_query.txt b/nidm/experiment/tools/repronim_simple2_query.txt index 3ddae8b3..9c51c449 100644 --- a/nidm/experiment/tools/repronim_simple2_query.txt +++ b/nidm/experiment/tools/repronim_simple2_query.txt @@ -6,11 +6,11 @@ PREFIX dct: PREFIX ncicb: PREFIX ndar: - PREFIX sio: + PREFIX sio: PREFIX xsd: PREFIX onli: - - SELECT DISTINCT ?ID ?Age ?Gender ?FIQ ?PIQ ?VIQ ?RootPath ?Filename + + SELECT DISTINCT ?ID ?Age ?Gender ?FIQ ?PIQ ?VIQ ?RootPath ?Filename WHERE { { ?entity rdf:type nidm:AcquisitionObject ; @@ -33,6 +33,6 @@ bind(IF(?Gender ="1"^^xsd:string, "Male"^^xsd:string,"Female"^^xsd:string) as ?Gender) . } - FILTER(xsd:float(?Age)>=5 && xsd:float(?Age) <=20) - - } \ No newline at end of file + FILTER(xsd:float(?Age)>=5 && xsd:float(?Age) <=20) + + } diff --git a/nidm/experiment/tools/rest.py b/nidm/experiment/tools/rest.py index 65adfd76..11ab9e6f 100644 --- a/nidm/experiment/tools/rest.py +++ b/nidm/experiment/tools/rest.py @@ -1,75 +1,74 @@ -import nidm.experiment.Navigate -from nidm.experiment import Query -import nidm.experiment.tools.rest_statistics -from nidm.core import Constants +from copy import copy, deepcopy +import functools import json -import re -from urllib import parse import logging -import pprint +import operator import os +import pprint +import re from tempfile import gettempdir -from tabulate import tabulate -from copy import copy, deepcopy -from urllib.parse import urlparse, parse_qs -from nidm.experiment import Navigate +from urllib import parse +from urllib.parse import parse_qs, urlparse +from joblib import Memory +from nidm.core import Constants +from nidm.experiment import Navigate, Query +import nidm.experiment.Navigate from nidm.experiment.Utils import validate_uuid +import nidm.experiment.tools.rest_statistics +from numpy import mean, median, std +from tabulate import tabulate -from numpy import std, mean, median -import functools -import operator - -from joblib import Memory memory = Memory(gettempdir(), verbose=0) USE_JOBLIB_CACHE = False import simplejson + def convertListtoDict(lst): - ''' + """ This function converts a list to a dictionary :param lst: list to convert :return: dictionary - ''' - res_dct = {lst[i]: lst[i+1] for i in range(0,len(lst),2)} + """ + res_dct = {lst[i]: lst[i + 1] for i in range(0, len(lst), 2)} return res_dct -class RestParser: +class RestParser: OBJECT_FORMAT = 0 JSON_FORMAT = 1 CLI_FORMAT = 2 - def __init__(self, verbosity_level = 0, output_format = 0): + def __init__(self, verbosity_level=0, output_format=0): self.verbosity_level = verbosity_level self.output_format = output_format - self.restLog ("Setting output format {}".format(self.output_format), 4) + self.restLog("Setting output format {}".format(self.output_format), 4) def setOutputFormat(self, output_format): self.output_format = output_format - self.restLog ("Setting output format {}".format(self.output_format), 4) + self.restLog("Setting output format {}".format(self.output_format), 4) ##################### # Standard formatters ##################### def arrayFormat(self, result, headers): - def allUUIDs(arr): uuid_only = True for s in arr: - if type(s) != str or not re.match("^[0-9a-f]+-[0-9a-f]+-[0-9a-f]+-[0-9a-f]+-[0-9a-f]+$", s): + if type(s) != str or not re.match( + "^[0-9a-f]+-[0-9a-f]+-[0-9a-f]+-[0-9a-f]+-[0-9a-f]+$", s + ): uuid_only = False return uuid_only - if self.output_format == RestParser.JSON_FORMAT: return json.dumps(result, indent=2) elif self.output_format == RestParser.CLI_FORMAT: # most likely this is an array of strings but tabulate wants an array of arrays table = [] for s in result: - table.append( [s] ) + table.append([s]) if allUUIDs(result) and headers[0] == "": headers[0] = "UUID" return tabulate(table, headers=headers) @@ -80,28 +79,27 @@ def dictFormat(self, result, headers=[""]): table = [] appendicies = [] for key in result: - # format a list if type(result[key]) == list: appendix = [] for line in result[key]: - appendix.append( [ json.dumps(line) ] ) + appendix.append([json.dumps(line)]) appendicies.append(tabulate(appendix, [key])) # also put really short lists in as comma separated values - if len ( json.dumps(result[key]) ) < 40: - table.append( [ json.dumps(key), ",".join(result[key]) ] ) + if len(json.dumps(result[key])) < 40: + table.append([json.dumps(key), ",".join(result[key])]) # format a string elif type(result[key]) == str: - table.append([ json.dumps(key), result[key]]) + table.append([json.dumps(key), result[key]]) # format a dictionary elif type(result[key]) == dict: # put any dict into it's own table at the end (sort of like an appendix) appendix = [] for inner_key in result[key]: - appendix.append( [key, inner_key, result[key][inner_key] ] ) + appendix.append([key, inner_key, result[key][inner_key]]) appendicies.append(tabulate(appendix)) # format anything else @@ -117,14 +115,13 @@ def dictFormat(self, result, headers=[""]): else: return self.format(result) - def objectTableFormat(self,result, headers = None): - - def flatten(obj, maxDepth=10, table = [], rowInProgress = [], depth = 0): + def objectTableFormat(self, result, headers=None): + def flatten(obj, maxDepth=10, table=[], rowInProgress=[], depth=0): for key in obj: newrow = deepcopy(rowInProgress) - if depth< maxDepth and type(obj[key]) == dict: + if depth < maxDepth and type(obj[key]) == dict: newrow.append(key) - flatten(obj[key], maxDepth, table, newrow, depth+1) + flatten(obj[key], maxDepth, table, newrow, depth + 1) elif type(obj[key]) == str: newrow.append(key) newrow.append(obj[key]) @@ -138,95 +135,147 @@ def flatten(obj, maxDepth=10, table = [], rowInProgress = [], depth = 0): if headers == None: headers = [""] - return (tabulate(flatten(result), headers=headers)) - + return tabulate(flatten(result), headers=headers) def activityDataTableFormat(self, data): - headers = ['uuid', 'measure', 'label', 'value', 'unit'] - rows=[] + headers = ["uuid", "measure", "label", "value", "unit"] + rows = [] for inst_or_deriv in data: for d in inst_or_deriv.data: - rows.append( [inst_or_deriv.uuid, d.measureOf, d.label, d.value, d.hasUnit] ) - - return (tabulate(rows, headers=headers)) + rows.append( + [inst_or_deriv.uuid, d.measureOf, d.label, d.value, d.hasUnit] + ) + return tabulate(rows, headers=headers) ##################### # Custom formatters ##################### def projectSummaryFormat(self, result): - if self.output_format == self.CLI_FORMAT: ### added by DBK to sort things if "subjects" in result: - result["subjects"]["uuid"],result["subjects"]["subject id"] = self.sort_list(result["subjects"]["uuid"], result["subjects"]["subject id"]) + ( + result["subjects"]["uuid"], + result["subjects"]["subject id"], + ) = self.sort_list( + result["subjects"]["uuid"], result["subjects"]["subject id"] + ) else: result["subjects"] = [] if "data_elements" in result: - result["data_elements"]["uuid"],result["data_elements"]["label"] = self.sort_list(result["data_elements"]["uuid"], result["data_elements"]["label"]) + ( + result["data_elements"]["uuid"], + result["data_elements"]["label"], + ) = self.sort_list( + result["data_elements"]["uuid"], result["data_elements"]["label"] + ) else: result["data_elements"] = [] toptable = [] for key in result: - if not key in ['subjects', 'data_elements', 'field_values']: - toptable.append([ key, simplejson.dumps(result[key]) ]) - - if 'field_values' in result and len(result['field_values']) > 0 : - fh_header = ['subject', 'label', 'value', 'unit', 'isAbout'] #result['field_values'][0].keys() - fh_rows = [ [x.subject, x.label, x.value, x.hasUnit, x.isAbout] for x in result['field_values']] + if not key in ["subjects", "data_elements", "field_values"]: + toptable.append([key, simplejson.dumps(result[key])]) + + if "field_values" in result and len(result["field_values"]) > 0: + fh_header = [ + "subject", + "label", + "value", + "unit", + "isAbout", + ] # result['field_values'][0].keys() + fh_rows = [ + [x.subject, x.label, x.value, x.hasUnit, x.isAbout] + for x in result["field_values"] + ] field_table = tabulate(fh_rows, fh_header) - #added by DBK, if they asked for fields then just give them the fields + # added by DBK, if they asked for fields then just give them the fields return "{}".format(field_table) else: - field_table = '' + field_table = "" return "{}\n\n{}\n{}\n\n{}\n{}\n\n{}".format( tabulate(toptable), ### modified by DBK to account for new dictionary format of results # tabulate({"subjects": result["subjects"]}, headers="keys"), # sort list 2 by list 1 and replace unsorted version - tabulate([],headers=["Subject Information"]), + tabulate([], headers=["Subject Information"]), tabulate(result["subjects"], headers="keys"), - #tabulate({"data_elements": result["data_elements"]}, headers="keys"), - tabulate([],headers = ["Data Elements"]), - tabulate({'uuid': result["data_elements"]['uuid'], 'label': result["data_elements"]['label']}, headers="keys"), - field_table + # tabulate({"data_elements": result["data_elements"]}, headers="keys"), + tabulate([], headers=["Data Elements"]), + tabulate( + { + "uuid": result["data_elements"]["uuid"], + "label": result["data_elements"]["label"], + }, + headers="keys", + ), + field_table, ) else: # added by DBK to check if we had fields requested then we should just return those - if 'field_values' in result: + if "field_values" in result: # convert result['field_values'] to a list for json export - return self.format(result['field_values']) + return self.format(result["field_values"]) else: return self.format(result) def formatDerivatives(self, derivative): - self.restLog("formatting derivatives in format {}".format(self.output_format), 5) + self.restLog( + "formatting derivatives in format {}".format(self.output_format), 5 + ) if self.output_format == self.CLI_FORMAT: table = [] for uri in derivative: for measurement in derivative[uri]["values"]: - if measurement not in ["http://www.w3.org/ns/prov#wasGeneratedBy", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"]: # skip some NIDM structure artifacts - table.append([uri, - measurement, - derivative[uri]["values"][measurement]["label"], - "{} {}".format(derivative[uri]["values"][measurement]["value"], derivative[uri]["values"][measurement]["units"]), - derivative[uri]["values"][measurement]["datumType"], - derivative[uri]["values"][measurement]["isAbout"]]) - return tabulate(table, headers=["Derivative_UUID", "Measurement", "Label", "Value", "Datumtype", "isAbout"]) + if measurement not in [ + "http://www.w3.org/ns/prov#wasGeneratedBy", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", + ]: # skip some NIDM structure artifacts + table.append( + [ + uri, + measurement, + derivative[uri]["values"][measurement]["label"], + "{} {}".format( + derivative[uri]["values"][measurement]["value"], + derivative[uri]["values"][measurement]["units"], + ), + derivative[uri]["values"][measurement]["datumType"], + derivative[uri]["values"][measurement]["isAbout"], + ] + ) + return tabulate( + table, + headers=[ + "Derivative_UUID", + "Measurement", + "Label", + "Value", + "Datumtype", + "isAbout", + ], + ) else: return self.format(derivative) def dataElementsFormat(self, de_data): if self.output_format == self.CLI_FORMAT: - table = [] - headers = ['label', 'source_variable', 'hasUnit', 'description', 'dataElement', 'isAbout'] + headers = [ + "label", + "source_variable", + "hasUnit", + "description", + "dataElement", + "isAbout", + ] # for each data element, create a row with each value from the header - for de in de_data['data_elements']['data_type_info']: + for de in de_data["data_elements"]["data_type_info"]: row = [] for h in headers: row.append(de[h]) @@ -243,81 +292,82 @@ def dataElementsFormat(self, de_data): return self.format(de_data) def dataElementDetailsFormat(self, de_data): - return self.format(de_data) - + return self.format(de_data) def subjectFormat(self, subject_data): if self.output_format == self.CLI_FORMAT: - subjects = [] - for subject in subject_data['subject']: + for subject in subject_data["subject"]: subjects.append(subject) text = tabulate(subjects, headers=["Subject UUID", "Source Subject ID"]) - if 'fields' in subject_data: + if "fields" in subject_data: field_data = [] text += "\n\n" - for sub in subject_data['fields']: - for act in subject_data['fields'][sub]: - de = subject_data['fields'][sub][act] + for sub in subject_data["fields"]: + for act in subject_data["fields"][sub]: + de = subject_data["fields"][sub][act] field_data.append([sub, act, de.label, de.value]) - text += tabulate(field_data, headers=["Subject", "Activity", "Field", "Value"]) + text += tabulate( + field_data, headers=["Subject", "Activity", "Field", "Value"] + ) return text else: return self.format(subject_data) - def subjectSummaryFormat(self,result): + def subjectSummaryFormat(self, result): if self.output_format == self.CLI_FORMAT: - special_keys = ['instruments', 'derivatives', "activity"] + special_keys = ["instruments", "derivatives", "activity"] toptable = [] for key in result: if not key in special_keys: - toptable.append([ key, result[key] ]) + toptable.append([key, result[key]]) for key in special_keys: if type(result[key]) == dict: - toptable.append( [ key, ",".join(result[key].keys()) ] ) + toptable.append([key, ",".join(result[key].keys())]) elif type(result[key]) == list: - toptable.append( [ key, ",".join(result[key]) ]) + toptable.append([key, ",".join(result[key])]) else: - toptable.append([key, json.dumps(result[key]) ]) + toptable.append([key, json.dumps(result[key])]) - instruments = self.objectTableFormat(result['instruments'], ["Instrument_UUID", "Category", "Value"]) - derivatives= self.formatDerivatives(result['derivatives']) - - return "{}\n\n{}\n\n{}".format( - tabulate(toptable), - derivatives, - instruments + instruments = self.objectTableFormat( + result["instruments"], ["Instrument_UUID", "Category", "Value"] ) + derivatives = self.formatDerivatives(result["derivatives"]) + + return "{}\n\n{}\n\n{}".format(tabulate(toptable), derivatives, instruments) else: return self.format(result) - - def subjectSummaryFormat_v2(self,result): + def subjectSummaryFormat_v2(self, result): if self.output_format == self.CLI_FORMAT: - special_keys = ['instruments', 'derivatives', "activity"] + special_keys = ["instruments", "derivatives", "activity"] toptable = [] for key in result: if not key in special_keys: - toptable.append([ key, result[key] ]) + toptable.append([key, result[key]]) for key in special_keys: if key in result: if type(result[key]) == dict: - toptable.append( [ key, ",".join(result[key].keys()) ] ) - if type(result[key]) == list and len(result[key]) > 0 and type(result[key][0]) == Navigate.ActivityData: - toptable.append( [ key, ",".join( [x.uuid for x in result[key] ] ) ]) + toptable.append([key, ",".join(result[key].keys())]) + if ( + type(result[key]) == list + and len(result[key]) > 0 + and type(result[key][0]) == Navigate.ActivityData + ): + toptable.append([key, ",".join([x.uuid for x in result[key]])]) elif type(result[key]) == list: toptable.append([key, ",".join]) else: - toptable.append([key, json.dumps(result[key]) ]) + toptable.append([key, json.dumps(result[key])]) - instruments = self.activityDataTableFormat(result['instruments']) - derivatives= self.activityDataTableFormat(result['derivatives']) + instruments = self.activityDataTableFormat(result["instruments"]) + derivatives = self.activityDataTableFormat(result["derivatives"]) - return "{}\n\n{}\n\n{}".format(tabulate(toptable), derivatives, instruments ) + return "{}\n\n{}\n\n{}".format(tabulate(toptable), derivatives, instruments) else: return self.format(result) @@ -325,16 +375,16 @@ def subjectSummaryFormat_v2(self,result): ##################### # Sort Functions ##################### - def sort_list (self,list1,list2): - ''' + def sort_list(self, list1, list2): + """ This function will sort list 1 using list 2 values, returning sorted list 1, sorted list 2 - ''' + """ if len(list1) == 0 or len(list2) == 0: return list1, list2 - list1 = list(zip(*sorted(zip(list2,list1))))[1] - return list1,sorted(list2) + list1 = list(zip(*sorted(zip(list2, list1))))[1] + return list1, sorted(list2) ##################### # Route Functions @@ -352,7 +402,6 @@ def dataelementsSummary(self): result = Navigate.GetDataelementDetails(self.nidm_files, dataelement) return self.dataElementDetailsFormat(result) - def projects(self): result = [] field_values = [] @@ -361,49 +410,59 @@ def projects(self): for uuid in projects: result.append(str(uuid).replace(Constants.NIIRI, "")) - # if we got fields, drill into each subject and pull out the field data # subject details -> derivatives / instrument -> values -> element - if 'fields' in self.query and len(self.query['fields']) > 0: + if "fields" in self.query and len(self.query["fields"]) > 0: subjects_set = set() dataelements_set = set() - self.restLog("Using fields {}".format(self.query['fields']), 2) + self.restLog("Using fields {}".format(self.query["fields"]), 2) # result['field_values'] = [] for proj in projects: # get all the synonyms for all the fields - field_synonyms = functools.reduce(operator.iconcat, - [Query.GetDatatypeSynonyms(self.nidm_files, proj, x) for x in - self.query['fields']], []) + field_synonyms = functools.reduce( + operator.iconcat, + [ + Query.GetDatatypeSynonyms(self.nidm_files, proj, x) + for x in self.query["fields"] + ], + [], + ) files = self.nidm_files - all_subjects = Query.GetParticipantUUIDsForProject(self.nidm_files, proj, self.query['filter']) # nidm_file_list= files, project_id=proj['uuid'], filter=self.query['filter']): - for sub in all_subjects['uuid']: - + all_subjects = Query.GetParticipantUUIDsForProject( + self.nidm_files, proj, self.query["filter"] + ) # nidm_file_list= files, project_id=proj['uuid'], filter=self.query['filter']): + for sub in all_subjects["uuid"]: for activity in Navigate.getActivities(self.nidm_files, sub): activity = Navigate.getActivityData(self.nidm_files, activity) for data_element in activity.data: if data_element.dataElement in field_synonyms: field_values.append(data_element._replace(subject=sub)) subjects_set.add(sub) - dataelements_set.add( (data_element.datumType, data_element.label) ) + dataelements_set.add( + (data_element.datumType, data_element.label) + ) if len(field_values) == 0: - raise ValueError("Supplied field not found. (" + ", ".join(self.query['fields']) + ")") + raise ValueError( + "Supplied field not found. (" + + ", ".join(self.query["fields"]) + + ")" + ) else: summary_result = {} - summary_result['subjects']= {"uuid":[], "subject id":[]} + summary_result["subjects"] = {"uuid": [], "subject id": []} for sub in subjects_set: summary_result["subjects"]["uuid"].append(sub) summary_result["subjects"]["subject id"].append("") - summary_result['data_elements'] = {"uuid": [], "label":[]} + summary_result["data_elements"] = {"uuid": [], "label": []} for de in dataelements_set: - summary_result['data_elements']["uuid"] = de[0] - summary_result['data_elements']["label"] = de[1] - summary_result['field_values'] = field_values + summary_result["data_elements"]["uuid"] = de[0] + summary_result["data_elements"]["label"] = de[1] + summary_result["field_values"] = field_values return self.projectSummaryFormat(summary_result) - return self.format(result, ["UUID"]) def ExpandProjectMetaData(self, meta_data): @@ -420,37 +479,52 @@ def ExpandProjectMetaData(self, meta_data): :param meta_data: :return: """ - for project_id in meta_data['projects']: - - project_uuid = str(project_id)[6:] if (str(project_id).startswith("niiri:")) else project_id - project = meta_data['projects'][project_id] + for project_id in meta_data["projects"]: + project_uuid = ( + str(project_id)[6:] + if (str(project_id).startswith("niiri:")) + else project_id + ) + project = meta_data["projects"][project_id] ages = set() hands = set() genders = set() - for session in Navigate.getSessions(self.nidm_files, project_uuid): for acq in Navigate.getAcquisitions(self.nidm_files, session): act_data = Navigate.getActivityData(self.nidm_files, acq) for de in act_data.data: - if de.isAbout == "http://uri.interlex.org/ilx_0100400" or de.isAbout == "http://uri.interlex.org/base/ilx_0100400": - if de.value == 'n/a' or de.value =='nan': + if ( + de.isAbout == "http://uri.interlex.org/ilx_0100400" + or de.isAbout == "http://uri.interlex.org/base/ilx_0100400" + ): + if de.value == "n/a" or de.value == "nan": ages.add(float("nan")) else: ages.add(float(de.value)) - elif de.isAbout == "http://uri.interlex.org/ilx_0101292" or de.isAbout == "http://uri.interlex.org/base/ilx_0101292"\ - or de.isAbout == "http://uri.interlex.org/ilx_0738439" or de.isAbout == \ - "https://ndar.nih.gov/api/datadictionary/v2/dataelement/gender": + elif ( + de.isAbout == "http://uri.interlex.org/ilx_0101292" + or de.isAbout == "http://uri.interlex.org/base/ilx_0101292" + or de.isAbout == "http://uri.interlex.org/ilx_0738439" + or de.isAbout + == "https://ndar.nih.gov/api/datadictionary/v2/dataelement/gender" + ): genders.add(de.value) - elif de.isAbout == "http://purl.obolibrary.org/obo/PATO_0002201": + elif ( + de.isAbout == "http://purl.obolibrary.org/obo/PATO_0002201" + ): hands.add(de.value) print(Query.GetParticipantUUIDsForProject(self.nidm_files, project_uuid)) - project['age_max'] = max(ages) if len(ages) > 0 else 0 - project['age_min'] = min(ages) if len(ages) > 0 else 0 - project[Query.matchPrefix(str(Constants.NIDM_NUMBER_OF_SUBJECTS))] = len((Query.GetParticipantUUIDsForProject(self.nidm_files, project_uuid))['uuid']) + project["age_max"] = max(ages) if len(ages) > 0 else 0 + project["age_min"] = min(ages) if len(ages) > 0 else 0 + project[Query.matchPrefix(str(Constants.NIDM_NUMBER_OF_SUBJECTS))] = len( + (Query.GetParticipantUUIDsForProject(self.nidm_files, project_uuid))[ + "uuid" + ] + ) project[str(Constants.NIDM_GENDER)] = list(genders) project[str(Constants.NIDM_HANDEDNESS)] = list(hands) @@ -467,71 +541,86 @@ def projectStats(self): self.ExpandProjectMetaData(meta_data) projects = Query.compressForJSONResponse(meta_data) - for pid in projects['projects'].keys(): + for pid in projects["projects"].keys(): self.restLog("comparng " + str(pid) + " with " + str(id), 5) self.restLog("comparng " + str(pid) + " with " + Constants.NIIRI + id, 5) self.restLog("comparng " + str(pid) + " with niiri:" + id, 5) if pid == id or pid == Constants.NIIRI + id or pid == "niiri:" + id: # strip off prefixes to make it more human readable - for key in projects['projects'][pid]: + for key in projects["projects"][pid]: short_key = key - possible_prefix = re.sub(':.*', '', short_key) + possible_prefix = re.sub(":.*", "", short_key) if possible_prefix in Constants.namespaces: - short_key = re.sub('^.*:', '', short_key) - result[short_key] = projects['projects'][pid][key] + short_key = re.sub("^.*:", "", short_key) + result[short_key] = projects["projects"][pid][key] # now get any fields they requested - for field in self.query['fields']: + for field in self.query["fields"]: if subjects == None: - subjects = Query.GetParticipantUUIDsForProject(tuple(self.nidm_files), project_id=id, filter=self.query['filter']) - result['subjects'] = subjects['uuid'] - bits = field.split('.') + subjects = Query.GetParticipantUUIDsForProject( + tuple(self.nidm_files), project_id=id, filter=self.query["filter"] + ) + result["subjects"] = subjects["uuid"] + bits = field.split(".") if len(bits) > 1: - stat_type = self.getStatType(bits[0]) # should be either instruments or derivatives for now. - self.addFieldStats(result, id, subjects['uuid'], bits[1], stat_type) # bits[1] will be the ID + stat_type = self.getStatType( + bits[0] + ) # should be either instruments or derivatives for now. + self.addFieldStats( + result, id, subjects["uuid"], bits[1], stat_type + ) # bits[1] will be the ID return self.dictFormat(result) STAT_TYPE_OTHER = 0 STAT_TYPE_INSTRUMENTS = 1 STAT_TYPE_DERIVATIVES = 2 + def getStatType(self, name): - lookup = {"instruments": self.STAT_TYPE_INSTRUMENTS, "derivatives" : self.STAT_TYPE_DERIVATIVES} - if name in lookup: return lookup[name] + lookup = { + "instruments": self.STAT_TYPE_INSTRUMENTS, + "derivatives": self.STAT_TYPE_DERIVATIVES, + } + if name in lookup: + return lookup[name] return self.STAT_TYPE_OTHER - @staticmethod def getTailOfURI(uri): - if '#' in uri: - return uri[uri.rfind('#') + 1:] + if "#" in uri: + return uri[uri.rfind("#") + 1 :] else: - return uri[uri.rfind('/') + 1:] - + return uri[uri.rfind("/") + 1 :] def addFieldStats(self, result, project, subjects, field, type): - ''' + """ Geneerates basic stats on a group of subjects and adds it to the result :param result: :param subjects: :param field: :return: - ''' + """ values = [] for s in subjects: if type == self.STAT_TYPE_INSTRUMENTS: - data = Query.GetParticipantInstrumentData(tuple(self.nidm_files), project, s) + data = Query.GetParticipantInstrumentData( + tuple(self.nidm_files), project, s + ) for i in data: if field in data[i]: - values.append( float(data[i][field]) ) + values.append(float(data[i][field])) # derivatives are of the form [UUID]['values'][URI]{datumType, label, values, units} if type == self.STAT_TYPE_DERIVATIVES: - data = Query.GetDerivativesDataForSubject(tuple(self.nidm_files), project, s) + data = Query.GetDerivativesDataForSubject( + tuple(self.nidm_files), project, s + ) for deriv in data: - for URI in data[deriv]['values']: - measures = data[deriv]['values'][URI] - if field == measures['label'] or field == self.getTailOfURI(URI): - values.append( float(measures['value']) ) + for URI in data[deriv]["values"]: + measures = data[deriv]["values"][URI] + if field == measures["label"] or field == self.getTailOfURI( + URI + ): + values.append(float(measures["value"])) if len(values) > 0: med = median(values) @@ -541,65 +630,97 @@ def addFieldStats(self, result, project, subjects, field, type): mx = max(values) else: med = avg = st = mn = mx = None - result[field] = {"max": mx, "min": mn, "median": med, "mean": avg, "standard_deviation": st} + result[field] = { + "max": mx, + "min": mn, + "median": med, + "mean": avg, + "standard_deviation": st, + } def projectSummary(self): - match = re.match(r"^/?projects/([^/]+)$", self.command) id = parse.unquote(str(match.group(1))) self.restLog("Returning project {} summary".format(id), 2) - result = nidm.experiment.Navigate.GetProjectAttributes(self.nidm_files, project_id=id) - result['subjects'] = Query.GetParticipantUUIDsForProject(self.nidm_files, project_id=id, filter=self.query['filter']) - result['data_elements'] = Query.GetProjectDataElements(self.nidm_files, project_id=id) - + result = nidm.experiment.Navigate.GetProjectAttributes( + self.nidm_files, project_id=id + ) + result["subjects"] = Query.GetParticipantUUIDsForProject( + self.nidm_files, project_id=id, filter=self.query["filter"] + ) + result["data_elements"] = Query.GetProjectDataElements( + self.nidm_files, project_id=id + ) # if we got fields, drill into each subject and pull out the field data # subject details -> derivatives / instrument -> values -> element - if 'fields' in self.query and len(self.query['fields']) > 0: - self.restLog("Using fields {}".format(self.query['fields']), 2) - result['field_values'] = [] + if "fields" in self.query and len(self.query["fields"]) > 0: + self.restLog("Using fields {}".format(self.query["fields"]), 2) + result["field_values"] = [] # get all the synonyms for all the fields - field_synonyms = functools.reduce( operator.iconcat, [ Query.GetDatatypeSynonyms(self.nidm_files, id, x) for x in self.query['fields'] ], []) - for sub in result['subjects']['uuid']: - + field_synonyms = functools.reduce( + operator.iconcat, + [ + Query.GetDatatypeSynonyms(self.nidm_files, id, x) + for x in self.query["fields"] + ], + [], + ) + for sub in result["subjects"]["uuid"]: for activity in Navigate.getActivities(self.nidm_files, sub): activity = Navigate.getActivityData(self.nidm_files, activity) for data_element in activity.data: if data_element.dataElement in field_synonyms: - result['field_values'].append(data_element._replace(subject=sub)) + result["field_values"].append( + data_element._replace(subject=sub) + ) - if len(result['field_values']) == 0: - raise ValueError("Supplied field not found. (" + ", ".join(self.query['fields']) + ")") + if len(result["field_values"]) == 0: + raise ValueError( + "Supplied field not found. (" + + ", ".join(self.query["fields"]) + + ")" + ) return self.projectSummaryFormat(result) - def subjectsList(self): match = re.match(r"^/?projects/([^/]+)/subjects/?$", self.command) project = match.group((1)) - self.restLog("Returning all agents matching filter '{}' for project {}".format(self.query['filter'], project), 2) + self.restLog( + "Returning all agents matching filter '{}' for project {}".format( + self.query["filter"], project + ), + 2, + ) # result = Query.GetParticipantUUIDsForProject(self.nidm_files, project, self.query['filter'], None) all_subjects = Navigate.getSubjects(self.nidm_files, project) result = {} - result['uuid'] = [] - result['subject id'] = [] + result["uuid"] = [] + result["subject id"] = [] for sub_uuid in all_subjects: - if Query.CheckSubjectMatchesFilter(self.nidm_files,project, sub_uuid, self.query['filter']): - uuid_string = (str(sub_uuid)).split('/')[-1] # srip off the http://whatever/whatever/ - result['uuid'].append(uuid_string) + if Query.CheckSubjectMatchesFilter( + self.nidm_files, project, sub_uuid, self.query["filter"] + ): + uuid_string = (str(sub_uuid)).split("/")[ + -1 + ] # srip off the http://whatever/whatever/ + result["uuid"].append(uuid_string) sid = Navigate.getSubjectIDfromUUID(self.nidm_files, sub_uuid) - result['subject id'].append(str(sid)) + result["subject id"].append(str(sid)) return self.format(result) def projectSubjectSummary(self): match = re.match(r"^/?projects/([^/]+)/subjects/([^/]+)/?$", self.command) subject = Navigate.normalizeSingleSubjectToUUID(self.nidm_files, match.group(2)) self.restLog("Returning info about subject {}".format(match.group(2)), 2) - return self.subjectSummaryFormat(Query.GetParticipantDetails(self.nidm_files, match.group(1), subject)) + return self.subjectSummaryFormat( + Query.GetParticipantDetails(self.nidm_files, match.group(1), subject) + ) def getFieldInfoForSubject(self, project, subject): - ''' + """ Returns a dictionary of activities where the subject has matching field data The result[activity] is the full data_element so to get the value you would use result[activity].value Note that a subject could match the same field in multiple activities. @@ -607,13 +728,20 @@ def getFieldInfoForSubject(self, project, subject): :param project: :param subject: :return: - ''' + """ result = {} # if we got fields, drill into each subject and pull out the field data # subject details -> derivatives / instrument -> values -> element - if 'fields' in self.query and len(self.query['fields']) > 0: + if "fields" in self.query and len(self.query["fields"]) > 0: # get all the synonyms for all the fields - we can search for them all at once - field_synonyms = functools.reduce( operator.iconcat, [ Query.GetDatatypeSynonyms(self.nidm_files, project, x) for x in self.query['fields'] ], []) + field_synonyms = functools.reduce( + operator.iconcat, + [ + Query.GetDatatypeSynonyms(self.nidm_files, project, x) + for x in self.query["fields"] + ], + [], + ) # print (field_synonyms) @@ -621,27 +749,37 @@ def getFieldInfoForSubject(self, project, subject): activity_data = Navigate.getActivityData(self.nidm_files, activity) # print ([ x.label for x in activity.data]) for data_element in activity_data.data: - if not set([data_element.dataElement, data_element.label, data_element.isAbout]).isdisjoint(set(field_synonyms)): + if not set( + [ + data_element.dataElement, + data_element.label, + data_element.isAbout, + ] + ).isdisjoint(set(field_synonyms)): result[Query.URITail(activity)] = data_element return result - def subjects(self): - self.restLog("Returning info about subjects",2) + self.restLog("Returning info about subjects", 2) projects = Navigate.getProjects(self.nidm_files) - result = {'subject': []} - if 'fields' in self.query and len(self.query['fields']) > 0: - result['fields'] = {} + result = {"subject": []} + if "fields" in self.query and len(self.query["fields"]) > 0: + result["fields"] = {} for proj in projects: subs = Navigate.getSubjects(self.nidm_files, proj) for s in subs: - result['subject'].append( [Query.URITail(s), Navigate.getSubjectIDfromUUID(self.nidm_files, s) ]) + result["subject"].append( + [ + Query.URITail(s), + Navigate.getSubjectIDfromUUID(self.nidm_files, s), + ] + ) # print ("getting info for " + str(s)) x = self.getFieldInfoForSubject(proj, s) if x != {}: - result['fields'][Query.URITail(s)] = x + result["fields"][Query.URITail(s)] = x return self.subjectFormat(result) def subjectSummary(self): @@ -663,27 +801,46 @@ def subjectSummary(self): data = Navigate.getActivityData(self.nidm_files, a) activityData.append(data) - return self.subjectSummaryFormat_v2( {'uuid': sub_ids, - 'instruments' : list(filter(lambda x: x.category == 'instrument', activityData)), - 'derivatives' : list(filter(lambda x: x.category == 'derivative', activityData)) - }) - + return self.subjectSummaryFormat_v2( + { + "uuid": sub_ids, + "instruments": list( + filter(lambda x: x.category == "instrument", activityData) + ), + "derivatives": list( + filter(lambda x: x.category == "derivative", activityData) + ), + } + ) def instrumentsList(self): result = [] - match = re.match(r"^/?projects/([^/]+)/subjects/([^/]+)/instruments/?$", self.command) + match = re.match( + r"^/?projects/([^/]+)/subjects/([^/]+)/instruments/?$", self.command + ) self.restLog("Returning instruments in subject {}".format(match.group(2)), 2) subject = Navigate.normalizeSingleSubjectToUUID(self.nidm_files, match.group(2)) - instruments = Query.GetParticipantInstrumentData(self.nidm_files, match.group(1), subject) + instruments = Query.GetParticipantInstrumentData( + self.nidm_files, match.group(1), subject + ) for i in instruments: result.append(i) return self.format(result) def instrumentSummary(self): - match = re.match(r"^/?projects/([^/]+)/subjects/([^/]+)/instruments/([^/]+)$", self.command) - self.restLog("Returning instrument {} in subject {}".format(match.group(3), match.group(2)), 2) + match = re.match( + r"^/?projects/([^/]+)/subjects/([^/]+)/instruments/([^/]+)$", self.command + ) + self.restLog( + "Returning instrument {} in subject {}".format( + match.group(3), match.group(2) + ), + 2, + ) subject = Navigate.normalizeSingleSubjectToUUID(self.nidm_files, match.group(2)) - instruments = Query.GetParticipantInstrumentData(self.nidm_files, match.group(1), subject) + instruments = Query.GetParticipantInstrumentData( + self.nidm_files, match.group(1), subject + ) return self.format(instruments[match.group(3)], headers=["Category", "Value"]) def derivativesList(self): @@ -691,96 +848,116 @@ def derivativesList(self): match = re.match(r"^/?projects/([^/]+)/subjects/([^/]+)", self.command) self.restLog("Returning derivatives in subject {}".format(match.group(2)), 2) subject = Navigate.normalizeSingleSubjectToUUID(self.nidm_files, match.group(2)) - derivatives = Query.GetDerivativesDataForSubject(self.nidm_files, match.group(1), subject) + derivatives = Query.GetDerivativesDataForSubject( + self.nidm_files, match.group(1), subject + ) for s in derivatives: result.append(s) return self.format(result) def derivativeSummary(self): - match = re.match(r"^/?projects/([^/]+)/subjects/([^/]+)/derivatives/([^/]+)", self.command) + match = re.match( + r"^/?projects/([^/]+)/subjects/([^/]+)/derivatives/([^/]+)", self.command + ) subject = Navigate.normalizeSingleSubjectToUUID(self.nidm_files, match.group(2)) uri = match.group(3) self.restLog("Returning stat {} in subject {}".format(uri, match.group(2)), 2) - derivatives = Query.GetDerivativesDataForSubject(self.nidm_files, match.group(1), subject) + derivatives = Query.GetDerivativesDataForSubject( + self.nidm_files, match.group(1), subject + ) - single_derivative = { uri: derivatives[uri] } + single_derivative = {uri: derivatives[uri]} self.restLog("Formatting single derivative", 5) - return self.formatDerivatives(single_derivative) def run(self, nidm_files, command): try: self.restLog("parsing command " + command, 1) self.restLog("Files to read:" + str(nidm_files), 1) - self.restLog("Using {} as the graph cache directory".format(gettempdir()), 1) + self.restLog( + "Using {} as the graph cache directory".format(gettempdir()), 1 + ) self.nidm_files = tuple(nidm_files) - #replace # marks with %23 - they are sometimes used in the is_about terms + # replace # marks with %23 - they are sometimes used in the is_about terms escaped = command.replace("#", "%23") u = urlparse(escaped) self.command = u.path self.query = parse_qs(u.query) - if 'filter' in self.query: - self.query['filter'] = self.query['filter'][0] + if "filter" in self.query: + self.query["filter"] = self.query["filter"][0] else: - self.query['filter'] = None + self.query["filter"] = None # normalize query dict for our particular situation - if 'fields' in self.query: - self.query['fields'] = str.split(self.query['fields'][0], ',') + if "fields" in self.query: + self.query["fields"] = str.split(self.query["fields"][0], ",") else: - self.query['fields'] = [] + self.query["fields"] = [] return self.route() except ValueError as ve: logging.error("Exception: {}".format(ve)) - return (self.format({"error": "One of the supplied field terms was not found."})) - - + return self.format( + {"error": "One of the supplied field terms was not found."} + ) def route(self): + if re.match(r"^/?dataelements/?$", self.command): + return self.dataelements() - if re.match(r"^/?dataelements/?$", self.command): return self.dataelements() + if re.match(r"^/?dataelements/[^/]+/?$", self.command): + return self.dataelementsSummary() - if re.match(r"^/?dataelements/[^/]+/?$", self.command): return self.dataelementsSummary() + if re.match(r"^/?projects/?$", self.command): + return self.projects() - if re.match(r"^/?projects/?$", self.command): return self.projects() + if re.match(r"^/?statistics/projects/[^/]+$", self.command): + return self.projectStats() - if re.match(r"^/?statistics/projects/[^/]+$", self.command): return self.projectStats() + if re.match(r"^/?projects/[^/]+$", self.command): + return self.projectSummary() - if re.match(r"^/?projects/[^/]+$", self.command): return self.projectSummary() + if re.match(r"^/?subjects/?$", self.command): + return self.subjects() - if re.match(r"^/?subjects/?$", self.command): return self.subjects() + if re.match(r"^/?subjects/[^/]+$", self.command): + return self.subjectSummary() - if re.match(r"^/?subjects/[^/]+$", self.command): return self.subjectSummary() + if re.match(r"^/?projects/[^/]+/subjects/?$", self.command): + return self.subjectsList() - if re.match(r"^/?projects/[^/]+/subjects/?$", self.command): return self.subjectsList() + if re.match(r"^/?projects/[^/]+/subjects/[^/]+/?$", self.command): + return self.projectSubjectSummary() - if re.match(r"^/?projects/[^/]+/subjects/[^/]+/?$", self.command): return self.projectSubjectSummary() + if re.match(r"^/?projects/[^/]+/subjects/[^/]+/instruments/?$", self.command): + return self.instrumentsList() - if re.match(r"^/?projects/[^/]+/subjects/[^/]+/instruments/?$", self.command): return self.instrumentsList() + if re.match( + r"^/?projects/[^/]+/subjects/[^/]+/instruments/[^/]+/?$", self.command + ): + return self.instrumentSummary() - if re.match(r"^/?projects/[^/]+/subjects/[^/]+/instruments/[^/]+/?$", self.command): return self.instrumentSummary() + if re.match(r"^/?projects/[^/]+/subjects/[^/]+/derivatives/?$", self.command): + return self.derivativesList() - if re.match(r"^/?projects/[^/]+/subjects/[^/]+/derivatives/?$", self.command): return self.derivativesList() - - if re.match(r"^/?projects/[^/]+/subjects/[^/]+/derivatives/[^/]+/?$", self.command): return self.derivativeSummary() + if re.match( + r"^/?projects/[^/]+/subjects/[^/]+/derivatives/[^/]+/?$", self.command + ): + return self.derivativeSummary() self.restLog("NO MATCH!", 2) return {"error": "No match for supplied URI"} - def restLog(self, message, verbosity_of_message): if verbosity_of_message <= self.verbosity_level: - print (message) - - + print(message) - def format(self, result, headers = [""]): + def format(self, result, headers=[""]): if self.output_format == RestParser.JSON_FORMAT: json_str = simplejson.dumps(result, indent=2) return json_str diff --git a/nidm/experiment/tools/rest_statistics.py b/nidm/experiment/tools/rest_statistics.py index a6fa95c2..450a23de 100644 --- a/nidm/experiment/tools/rest_statistics.py +++ b/nidm/experiment/tools/rest_statistics.py @@ -1,29 +1,35 @@ -import nidm.experiment.Navigate -from nidm.core import Constants +import sys from tempfile import gettempdir +from joblib import Memory +from nidm.core import Constants from nidm.experiment import Navigate +import nidm.experiment.Navigate import nidm.experiment.tools.rest -import sys - -from joblib import Memory memory = Memory(gettempdir(), verbose=0) USE_JOBLIB_CACHE = False def GetProjectsComputedMetadata(nidm_file_list): - ''' + """ :param nidm_file_list: List of one or more NIDM files to query across for list of Projects :return: Dictionary or projects, each project having a dictionary of project stats including age_max, age_min, gender list, and handedness list. - ''' + """ meta_data = {"projects": {}} projects = Navigate.getProjects(tuple(nidm_file_list)) for p in projects: proj_id = nidm.experiment.tools.rest.RestParser.getTailOfURI(str(p)) - meta_data["projects"][proj_id] = {"age_max": 0, "age_min": sys.maxsize, "gender": [], "handedness": [] } - meta_data["projects"][proj_id].update(Navigate.GetProjectAttributes(tuple(nidm_file_list), p)) + meta_data["projects"][proj_id] = { + "age_max": 0, + "age_min": sys.maxsize, + "gender": [], + "handedness": [], + } + meta_data["projects"][proj_id].update( + Navigate.GetProjectAttributes(tuple(nidm_file_list), p) + ) gender_set = set() hand_set = set() subjects = Navigate.getSubjects(tuple(nidm_file_list), p) @@ -36,10 +42,20 @@ def GetProjectsComputedMetadata(nidm_file_list): if type(data) == nidm.experiment.Navigate.ActivityData: for x in data.data: if x.isAbout == Constants.NIDM_IS_ABOUT_AGE: - if float(x.value) > meta_data["projects"][proj_id]["age_max"]: - meta_data["projects"][proj_id]["age_max"] = float(x.value) - if float(x.value) < meta_data["projects"][proj_id]["age_min"]: - meta_data["projects"][proj_id]["age_min"] = float(x.value) + if ( + float(x.value) + > meta_data["projects"][proj_id]["age_max"] + ): + meta_data["projects"][proj_id]["age_max"] = float( + x.value + ) + if ( + float(x.value) + < meta_data["projects"][proj_id]["age_min"] + ): + meta_data["projects"][proj_id]["age_min"] = float( + x.value + ) if x.isAbout == Constants.NIDM_IS_ABOUT_GENDER: gender_set.add(str(x.value)) if x.isAbout == Constants.NIDM_IS_ABOUT_HANDEDNESS: @@ -47,7 +63,6 @@ def GetProjectsComputedMetadata(nidm_file_list): meta_data["projects"][proj_id]["gender"] = list(gender_set) meta_data["projects"][proj_id]["handedness"] = list(hand_set) - return meta_data # meta_data = GetProjectsMetadata(nidm_file_list) # ExtractProjectSummary(meta_data, nidm_file_list) diff --git a/nidm/experiment/tools/tests/test_nidm_lingreg.py b/nidm/experiment/tools/tests/test_nidm_lingreg.py index 874dff3b..46ccd3a1 100644 --- a/nidm/experiment/tools/tests/test_nidm_lingreg.py +++ b/nidm/experiment/tools/tests/test_nidm_lingreg.py @@ -1,49 +1,54 @@ -import urllib -import re -import sys - -import pytest -import rdflib - -from nidm.experiment import Project, Session, AssessmentAcquisition, AssessmentObject, Acquisition, AcquisitionObject, Query -from nidm.core import Constants -from nidm.experiment.tools.rest import RestParser -from nidm.experiment.tools.tests.test_rest_statistics import BRAIN_VOL_FILES - -from nidm.experiment.tools.nidm_linreg import linear_regression - +from io import BytesIO, TextIOWrapper +import json import os -from os.path import join,sep +from os.path import join, sep from pathlib import Path -from rdflib import Graph, util, URIRef -import json -from io import TextIOWrapper, BytesIO +import re import subprocess from subprocess import PIPE +import sys import tempfile +import urllib import click +from nidm.core import Constants +from nidm.experiment import ( + Acquisition, + AcquisitionObject, + AssessmentAcquisition, + AssessmentObject, + Project, + Query, + Session, +) +from nidm.experiment.tools.nidm_linreg import linear_regression +from nidm.experiment.tools.rest import RestParser +from nidm.experiment.tools.tests.test_rest_statistics import BRAIN_VOL_FILES +import pytest +import rdflib +from rdflib import Graph, URIRef, util + @pytest.fixture(scope="module", autouse="True") def setup(): - - for f in ['./cmu_a.nidm.ttl', 'caltech.nidm.ttl']: + for f in ["./cmu_a.nidm.ttl", "caltech.nidm.ttl"]: if Path(f).is_file(): os.remove(f) - if not Path('./cmu_a.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./cmu_a.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl", - "cmu_a.nidm.ttl" + "cmu_a.nidm.ttl", ) - if not Path('./caltech.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./caltech.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/Caltech/nidm.ttl", - "caltech.nidm.ttl" + "caltech.nidm.ttl", ) + def call_click_command(cmd, *args, **kwargs): - """ Wrapper to call a click command + """Wrapper to call a click command :param cmd: click cli command function to call :param args: arguments to pass to the function @@ -53,8 +58,7 @@ def call_click_command(cmd, *args, **kwargs): # Get positional arguments from args arg_values = {c.name: a for a, c in zip(args, cmd.params)} - args_needed = {c.name: c for c in cmd.params - if c.name not in arg_values} + args_needed = {c.name: c for c in cmd.params if c.name not in arg_values} # build and check opts list from kwargs opts = {a.name: a for a in cmd.params if isinstance(a, click.Option)} @@ -66,19 +70,17 @@ def call_click_command(cmd, *args, **kwargs): arg_values[name] = kwargs[name] del args_needed[name] else: - raise click.BadParameter( - "Unknown keyword argument '{}'".format(name)) - + raise click.BadParameter("Unknown keyword argument '{}'".format(name)) # check positional arguments list for arg in (a for a in cmd.params if isinstance(a, click.Argument)): if arg.name not in arg_values: - raise click.BadParameter("Missing required positional" - "parameter '{}'".format(arg.name)) + raise click.BadParameter( + "Missing required positional" "parameter '{}'".format(arg.name) + ) # build parameter lists - opts_list = sum( - [[o.opts[0], arg_values[n]] for n, o in opts.items()], []) + opts_list = sum([[o.opts[0], arg_values[n]] for n, o in opts.items()], []) args_list = [str(v) for n, v in arg_values.items() if n not in opts] # call the command @@ -88,21 +90,20 @@ def call_click_command(cmd, *args, **kwargs): pass - def test_simple_model(): - - # run linear regression tool with simple model and evaluate output dirname = os.path.dirname(__file__) - linreg_dirname = join(sep+join(*(dirname.split(sep)[:-1]))) - + linreg_dirname = join(sep + join(*(dirname.split(sep)[:-1]))) - arguments = (dict(nidm_file_list =",".join(BRAIN_VOL_FILES), - ml='fs_000008 = DX_GROUP + http://uri.interlex.org/ilx_0100400', - ctr=None,regularization=None,output_file="output.txt")) + arguments = dict( + nidm_file_list=",".join(BRAIN_VOL_FILES), + ml="fs_000008 = DX_GROUP + http://uri.interlex.org/ilx_0100400", + ctr=None, + regularization=None, + output_file="output.txt", + ) - call_click_command(linear_regression,*arguments,**arguments) - + call_click_command(linear_regression, *arguments, **arguments) if os.path.exists("output.txt"): fp = open("output.txt", "r") @@ -110,8 +111,6 @@ def test_simple_model(): fp.close() os.remove("output.txt") - - # check if model was read correctly assert "fs_000008 ~ ilx_0100400 + DX_GROUP" in out @@ -119,25 +118,38 @@ def test_simple_model(): assert "No. Observations: 53" in out # check model coefficients - assert "const 27.7816 4.378 6.345 0.000 18.988 36.576" in out - assert "ilx_0100400 -0.1832 0.173 -1.061 0.294 -0.530 0.164" in out - assert "DX_GROUP 3.4908 4.031 0.866 0.391 -4.605 11.587" in out + assert ( + "const 27.7816 4.378 6.345 0.000 18.988 36.576" + in out + ) + assert ( + "ilx_0100400 -0.1832 0.173 -1.061 0.294 -0.530 0.164" + in out + ) + assert ( + "DX_GROUP 3.4908 4.031 0.866 0.391 -4.605 11.587" + in out + ) + def test_model_with_contrasts(): # run linear regression tool with simple model and evaluate output - arguments = (dict(nidm_file_list=",".join(BRAIN_VOL_FILES), - ml='fs_000008 = DX_GROUP + http://uri.interlex.org/ilx_0100400', - ctr="DX_GROUP", regularization=None, output_file="output.txt")) + arguments = dict( + nidm_file_list=",".join(BRAIN_VOL_FILES), + ml="fs_000008 = DX_GROUP + http://uri.interlex.org/ilx_0100400", + ctr="DX_GROUP", + regularization=None, + output_file="output.txt", + ) call_click_command(linear_regression, *arguments, **arguments) - - if os.path.exists('output.txt'): - fp = open('output.txt', "r") + if os.path.exists("output.txt"): + fp = open("output.txt", "r") out = fp.read() fp.close() - os.remove('output.txt') + os.remove("output.txt") # print(out) @@ -148,32 +160,67 @@ def test_model_with_contrasts(): assert "No. Observations: 53" in out # check model coefficients for different codings - assert "C(DX_GROUP, Treatment)[T.1] 0.7307 4.209 0.174 0.863 -7.727 9.189" in out - assert "C(DX_GROUP, Treatment)[T.2] 32.7462 15.984 2.049 0.046 0.625 64.868" in out - assert "C(DX_GROUP, Simple)[Simp.0] 0.7307 4.209 0.174 0.863 -7.727 9.189" in out - assert "C(DX_GROUP, Simple)[Simp.1] 32.7462 15.984 2.049 0.046 0.625 64.868" in out - assert "C(DX_GROUP, Sum)[S.0] -11.1590 5.713 -1.953 0.057 -22.639 0.321" in out - assert "C(DX_GROUP, Sum)[S.1] -10.4283 5.631 -1.852 0.070 -21.743 0.887" in out - assert "C(DX_GROUP, Diff)[D.0] 0.7307 4.209 0.174 0.863 -7.727 9.189" in out - assert "C(DX_GROUP, Diff)[D.1] 32.0155 15.896 2.014 0.050 0.070 63.961" in out - assert "C(DX_GROUP, Helmert)[H.1] 0.3653 2.104 0.174 0.863 -3.864 4.594" in out - assert "C(DX_GROUP, Helmert)[H.2] 10.7936 5.267 2.049 0.046 0.209 21.378" in out - -@pytest.mark.skip(reason="regularization weights seem to be different depending on the platform") + assert ( + "C(DX_GROUP, Treatment)[T.1] 0.7307 4.209 0.174 0.863 -7.727 9.189" + in out + ) + assert ( + "C(DX_GROUP, Treatment)[T.2] 32.7462 15.984 2.049 0.046 0.625 64.868" + in out + ) + assert ( + "C(DX_GROUP, Simple)[Simp.0] 0.7307 4.209 0.174 0.863 -7.727 9.189" + in out + ) + assert ( + "C(DX_GROUP, Simple)[Simp.1] 32.7462 15.984 2.049 0.046 0.625 64.868" + in out + ) + assert ( + "C(DX_GROUP, Sum)[S.0] -11.1590 5.713 -1.953 0.057 -22.639 0.321" + in out + ) + assert ( + "C(DX_GROUP, Sum)[S.1] -10.4283 5.631 -1.852 0.070 -21.743 0.887" + in out + ) + assert ( + "C(DX_GROUP, Diff)[D.0] 0.7307 4.209 0.174 0.863 -7.727 9.189" + in out + ) + assert ( + "C(DX_GROUP, Diff)[D.1] 32.0155 15.896 2.014 0.050 0.070 63.961" + in out + ) + assert ( + "C(DX_GROUP, Helmert)[H.1] 0.3653 2.104 0.174 0.863 -3.864 4.594" + in out + ) + assert ( + "C(DX_GROUP, Helmert)[H.2] 10.7936 5.267 2.049 0.046 0.209 21.378" + in out + ) + + +@pytest.mark.skip( + reason="regularization weights seem to be different depending on the platform" +) def test_model_with_contrasts_reg_L1(): - - - arguments = (dict(nidm_file_list=",".join(BRAIN_VOL_FILES), - ml='fs_000008 = DX_GROUP + http://uri.interlex.org/ilx_0100400', - ctr="DX_GROUP", regularization="L1", output_file="output.txt")) + arguments = dict( + nidm_file_list=",".join(BRAIN_VOL_FILES), + ml="fs_000008 = DX_GROUP + http://uri.interlex.org/ilx_0100400", + ctr="DX_GROUP", + regularization="L1", + output_file="output.txt", + ) call_click_command(linear_regression, *arguments, **arguments) - if os.path.exists('output.txt'): - fp = open('output.txt', "r") + if os.path.exists("output.txt"): + fp = open("output.txt", "r") out = fp.read() fp.close() - os.remove('output.txt') + os.remove("output.txt") print(out) @@ -190,20 +237,26 @@ def test_model_with_contrasts_reg_L1(): assert "DX_GROUP 0.000000" in out assert "Intercept: 26.000000" in out -@pytest.mark.skip(reason="regularization weights seem to be different depending on the platform") -def test_model_with_contrasts_reg_L2(): - arguments = (dict(nidm_file_list=",".join(BRAIN_VOL_FILES), - ml='fs_000008 = DX_GROUP + http://uri.interlex.org/ilx_0100400', - ctr="DX_GROUP", regularization="L2", output_file="output.txt")) +@pytest.mark.skip( + reason="regularization weights seem to be different depending on the platform" +) +def test_model_with_contrasts_reg_L2(): + arguments = dict( + nidm_file_list=",".join(BRAIN_VOL_FILES), + ml="fs_000008 = DX_GROUP + http://uri.interlex.org/ilx_0100400", + ctr="DX_GROUP", + regularization="L2", + output_file="output.txt", + ) call_click_command(linear_regression, *arguments, **arguments) - if os.path.exists('output.txt'): - fp = open('output.txt', "r") + if os.path.exists("output.txt"): + fp = open("output.txt", "r") out = fp.read() fp.close() - os.remove('output.txt') + os.remove("output.txt") # print(out) @@ -218,4 +271,4 @@ def test_model_with_contrasts_reg_L2(): assert "Current Model Score = 0.017618" in out assert "ilx_0100400 -0.148397" in out assert "DX_GROUP 0.071356" in out - assert "Intercept: 28.951297" in out \ No newline at end of file + assert "Intercept: 28.951297" in out diff --git a/nidm/experiment/tools/tests/test_nidm_query.py b/nidm/experiment/tools/tests/test_nidm_query.py index 878438f1..c59bf32d 100644 --- a/nidm/experiment/tools/tests/test_nidm_query.py +++ b/nidm/experiment/tools/tests/test_nidm_query.py @@ -1,14 +1,15 @@ +import subprocess import click from click.testing import CliRunner -import subprocess import pytest - from ..nidm_query import query + def test_query_failing(): runner = CliRunner() res = runner.invoke(query) assert res.exit_code != 0 assert "Missing option" in res.output + # TODO: adding tests that are passing diff --git a/nidm/experiment/tools/tests/test_rest.py b/nidm/experiment/tools/tests/test_rest.py index c421bc3a..4be7481b 100644 --- a/nidm/experiment/tools/tests/test_rest.py +++ b/nidm/experiment/tools/tests/test_rest.py @@ -1,22 +1,27 @@ -import urllib +import os +from pathlib import Path import re - -import pytest -import rdflib - -from nidm.experiment import Project, Session, AssessmentAcquisition, AssessmentObject, Acquisition, AcquisitionObject, Query -from nidm.experiment.CDE import getCDEs +import urllib from nidm.core import Constants +from nidm.experiment import ( + Acquisition, + AcquisitionObject, + AssessmentAcquisition, + AssessmentObject, + Project, + Query, + Session, +) +from nidm.experiment.CDE import getCDEs from nidm.experiment.tools.rest import RestParser -import os -from pathlib import Path -from rdflib import Graph, util, URIRef - +import pytest +import rdflib +from rdflib import Graph, URIRef, util -REST_TEST_FILE = './agent.ttl' -BRAIN_VOL_FILES = ['./cmu_a.nidm.ttl', './caltech.nidm.ttl'] -OPENNEURO_FILES = ['ds000120.nidm.ttl'] -ALL_FILES = ['./cmu_a.nidm.ttl', './caltech.nidm.ttl', 'ds000120.nidm.ttl'] +REST_TEST_FILE = "./agent.ttl" +BRAIN_VOL_FILES = ["./cmu_a.nidm.ttl", "./caltech.nidm.ttl"] +OPENNEURO_FILES = ["ds000120.nidm.ttl"] +ALL_FILES = ["./cmu_a.nidm.ttl", "./caltech.nidm.ttl", "ds000120.nidm.ttl"] OPENNEURO_PROJECT_URI = None OPENNEURO_SUB_URI = None @@ -25,124 +30,175 @@ cmu_test_project_uuid = None cmu_test_subject_uuid = None + @pytest.fixture(scope="module", autouse="True") def setup(): global cmu_test_project_uuid, cmu_test_subject_uuid, OPENNEURO_PROJECT_URI, OPENNEURO_SUB_URI if Path(REST_TEST_FILE).is_file(): os.remove(REST_TEST_FILE) - makeTestFile(filename=REST_TEST_FILE, params={'PROJECT_UUID': 'p1', 'PROJECT2_UUID': 'p2'}) + makeTestFile( + filename=REST_TEST_FILE, params={"PROJECT_UUID": "p1", "PROJECT2_UUID": "p2"} + ) - for f in ['./cmu_a.nidm.ttl', 'caltech.nidm.ttl']: + for f in ["./cmu_a.nidm.ttl", "caltech.nidm.ttl"]: if Path(f).is_file(): os.remove(f) - if not Path('./cmu_a.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./cmu_a.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl", - "cmu_a.nidm.ttl" + "cmu_a.nidm.ttl", ) - if not Path('./caltech.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./caltech.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/Caltech/nidm.ttl", - "caltech.nidm.ttl" + "caltech.nidm.ttl", ) restParser = RestParser(output_format=RestParser.OBJECT_FORMAT) - projects = restParser.run(BRAIN_VOL_FILES, '/projects') + projects = restParser.run(BRAIN_VOL_FILES, "/projects") for p in projects: - proj_info = restParser.run(BRAIN_VOL_FILES, '/projects/{}'.format(p)) - if 'dctypes:title' in proj_info.keys() and proj_info['dctypes:title'] == 'ABIDE - CMU_a': + proj_info = restParser.run(BRAIN_VOL_FILES, "/projects/{}".format(p)) + if ( + "dctypes:title" in proj_info.keys() + and proj_info["dctypes:title"] == "ABIDE - CMU_a" + ): cmu_test_project_uuid = p break - subjects = restParser.run(BRAIN_VOL_FILES, '/projects/{}/subjects'.format(cmu_test_project_uuid)) - cmu_test_subject_uuid = subjects['uuid'][0] - + subjects = restParser.run( + BRAIN_VOL_FILES, "/projects/{}/subjects".format(cmu_test_project_uuid) + ) + cmu_test_subject_uuid = subjects["uuid"][0] - if not Path('./ds000120.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./ds000120.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/openneuro/ds000120/nidm.ttl", - "ds000120.nidm.ttl" + "ds000120.nidm.ttl", ) - projects2 = restParser.run(OPENNEURO_FILES, '/projects') + projects2 = restParser.run(OPENNEURO_FILES, "/projects") for p in projects2: - proj_info = restParser.run(OPENNEURO_FILES, '/projects/{}'.format(p)) - if 'dctypes:title' in proj_info.keys() and proj_info['dctypes:title'] == 'Developmental changes in brain function underlying the influence of reward processing on inhibitory control (Slot Reward)': + proj_info = restParser.run(OPENNEURO_FILES, "/projects/{}".format(p)) + if ( + "dctypes:title" in proj_info.keys() + and proj_info["dctypes:title"] + == "Developmental changes in brain function underlying the influence of reward processing on inhibitory control (Slot Reward)" + ): OPENNEURO_PROJECT_URI = p - subjects = restParser.run(OPENNEURO_FILES, '/projects/{}/subjects'.format(OPENNEURO_PROJECT_URI)) - OPENNEURO_SUB_URI = subjects['uuid'][0] + subjects = restParser.run( + OPENNEURO_FILES, "/projects/{}/subjects".format(OPENNEURO_PROJECT_URI) + ) + OPENNEURO_SUB_URI = subjects["uuid"][0] def addData(acq, data): acq_entity = AssessmentObject(acquisition=acq) for key in data: - acq_entity.add_attributes({key:data[key]}) + acq_entity.add_attributes({key: data[key]}) return acq + def makeTestFile(filename, params): global test_person_uuid, test_p2_subject_uuids - nidm_project_name = params.get('NIDM_PROJECT_NAME', False) or "Project_name_sample" - nidm_project_identifier = params.get('NIDM_PROJECT_IDENTIFIER', False) or 9610 - nidm_project2_identifier = params.get('NIDM_PROJECT_IDENTIFIER', False) or 550 - nidm_project_description = params.get('NIDM_PROJECT_DESCRIPTION', False) or "1234356 Test investigation" - project_uuid = params.get('PROJECT_UUID', False) or "_proj1" - project_uuid2 = params.get('PROJECT2_UUID', False) or "_proj2" - session_uuid = params.get('SESSION_UUID', False) or "_ses1" - session_uuid2 = params.get('SESSION2_UUID', False) or "_ses2" - p1kwargs={Constants.NIDM_PROJECT_NAME:nidm_project_name, Constants.NIDM_PROJECT_IDENTIFIER:nidm_project_identifier, Constants.NIDM_PROJECT_DESCRIPTION:nidm_project_description} - p2kwargs={Constants.NIDM_PROJECT_NAME:nidm_project_name, Constants.NIDM_PROJECT_IDENTIFIER:nidm_project2_identifier, Constants.NIDM_PROJECT_DESCRIPTION:nidm_project_description} - - project = Project(uuid=project_uuid,attributes=p1kwargs) - session = Session(uuid=session_uuid,project=project) - acq = Acquisition(uuid="_acq1",session=session) - acq2 = Acquisition(uuid="_acq2",session=session) - acq3 = Acquisition(uuid="_acq2",session=session) - - person=acq.add_person(attributes=({Constants.NIDM_SUBJECTID:"a1_9999"})) + nidm_project_name = params.get("NIDM_PROJECT_NAME", False) or "Project_name_sample" + nidm_project_identifier = params.get("NIDM_PROJECT_IDENTIFIER", False) or 9610 + nidm_project2_identifier = params.get("NIDM_PROJECT_IDENTIFIER", False) or 550 + nidm_project_description = ( + params.get("NIDM_PROJECT_DESCRIPTION", False) or "1234356 Test investigation" + ) + project_uuid = params.get("PROJECT_UUID", False) or "_proj1" + project_uuid2 = params.get("PROJECT2_UUID", False) or "_proj2" + session_uuid = params.get("SESSION_UUID", False) or "_ses1" + session_uuid2 = params.get("SESSION2_UUID", False) or "_ses2" + p1kwargs = { + Constants.NIDM_PROJECT_NAME: nidm_project_name, + Constants.NIDM_PROJECT_IDENTIFIER: nidm_project_identifier, + Constants.NIDM_PROJECT_DESCRIPTION: nidm_project_description, + } + p2kwargs = { + Constants.NIDM_PROJECT_NAME: nidm_project_name, + Constants.NIDM_PROJECT_IDENTIFIER: nidm_project2_identifier, + Constants.NIDM_PROJECT_DESCRIPTION: nidm_project_description, + } + + project = Project(uuid=project_uuid, attributes=p1kwargs) + session = Session(uuid=session_uuid, project=project) + acq = Acquisition(uuid="_acq1", session=session) + acq2 = Acquisition(uuid="_acq2", session=session) + acq3 = Acquisition(uuid="_acq2", session=session) + + person = acq.add_person(attributes=({Constants.NIDM_SUBJECTID: "a1_9999"})) test_person_uuid = (str(person.identifier)).replace("niiri:", "") + acq.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT) - acq.add_qualified_association(person=person,role=Constants.NIDM_PARTICIPANT) - - person2=acq2.add_person(attributes=({Constants.NIDM_SUBJECTID:"a1_8888"})) - acq2.add_qualified_association(person=person2,role=Constants.NIDM_PARTICIPANT) - person3=acq3.add_person(attributes=({Constants.NIDM_SUBJECTID:"a2_7777"})) - acq2.add_qualified_association(person=person3,role=Constants.NIDM_PARTICIPANT) + person2 = acq2.add_person(attributes=({Constants.NIDM_SUBJECTID: "a1_8888"})) + acq2.add_qualified_association(person=person2, role=Constants.NIDM_PARTICIPANT) + person3 = acq3.add_person(attributes=({Constants.NIDM_SUBJECTID: "a2_7777"})) + acq2.add_qualified_association(person=person3, role=Constants.NIDM_PARTICIPANT) - project2 = Project(uuid=project_uuid2,attributes=p2kwargs) - session2 = Session(uuid=session_uuid2,project=project2) - acq4 = Acquisition(uuid="_acq3",session=session2) - acq5 = Acquisition(uuid="_acq4",session=session2) + project2 = Project(uuid=project_uuid2, attributes=p2kwargs) + session2 = Session(uuid=session_uuid2, project=project2) + acq4 = Acquisition(uuid="_acq3", session=session2) + acq5 = Acquisition(uuid="_acq4", session=session2) - person4=acq4.add_person(attributes=({Constants.NIDM_SUBJECTID:"a3_6666"})) - acq4.add_qualified_association(person=person4,role=Constants.NIDM_PARTICIPANT) - person5=acq5.add_person(attributes=({Constants.NIDM_SUBJECTID:"a4_5555"})) - acq5.add_qualified_association(person=person5,role=Constants.NIDM_PARTICIPANT) + person4 = acq4.add_person(attributes=({Constants.NIDM_SUBJECTID: "a3_6666"})) + acq4.add_qualified_association(person=person4, role=Constants.NIDM_PARTICIPANT) + person5 = acq5.add_person(attributes=({Constants.NIDM_SUBJECTID: "a4_5555"})) + acq5.add_qualified_association(person=person5, role=Constants.NIDM_PARTICIPANT) # now add some assessment instrument data - addData(acq,{Constants.NIDM_AGE:9, Constants.NIDM_HANDEDNESS: "R", Constants.NIDM_DIAGNOSIS: "Anxiety"}) - addData(acq2,{Constants.NIDM_AGE:8, Constants.NIDM_HANDEDNESS: "L", Constants.NIDM_DIAGNOSIS: "ADHD"}) - addData(acq4,{Constants.NIDM_AGE:7, Constants.NIDM_HANDEDNESS: "A", Constants.NIDM_DIAGNOSIS: "Depression"}) - addData(acq5,{Constants.NIDM_AGE:6, Constants.NIDM_HANDEDNESS: "R", Constants.NIDM_DIAGNOSIS: "Depression"}) - - test_p2_subject_uuids.append( (str(person4.identifier)).replace("niiri:", "") ) - test_p2_subject_uuids.append( (str(person5.identifier)).replace("niiri:", "") ) - - with open("a.ttl",'w') as f: - f.write(project.graph.serialize(None, format='rdf', rdf_format='ttl')) - with open("b.ttl",'w') as f: - f.write(project2.graph.serialize(None, format='rdf', rdf_format='ttl')) - - #create empty graph - graph=Graph() + addData( + acq, + { + Constants.NIDM_AGE: 9, + Constants.NIDM_HANDEDNESS: "R", + Constants.NIDM_DIAGNOSIS: "Anxiety", + }, + ) + addData( + acq2, + { + Constants.NIDM_AGE: 8, + Constants.NIDM_HANDEDNESS: "L", + Constants.NIDM_DIAGNOSIS: "ADHD", + }, + ) + addData( + acq4, + { + Constants.NIDM_AGE: 7, + Constants.NIDM_HANDEDNESS: "A", + Constants.NIDM_DIAGNOSIS: "Depression", + }, + ) + addData( + acq5, + { + Constants.NIDM_AGE: 6, + Constants.NIDM_HANDEDNESS: "R", + Constants.NIDM_DIAGNOSIS: "Depression", + }, + ) + + test_p2_subject_uuids.append((str(person4.identifier)).replace("niiri:", "")) + test_p2_subject_uuids.append((str(person5.identifier)).replace("niiri:", "")) + + with open("a.ttl", "w") as f: + f.write(project.graph.serialize(None, format="rdf", rdf_format="ttl")) + with open("b.ttl", "w") as f: + f.write(project2.graph.serialize(None, format="rdf", rdf_format="ttl")) + + # create empty graph + graph = Graph() for nidm_file in ("a.ttl", "b.ttl"): - tmp = Graph() - graph = graph + tmp.parse(nidm_file,format=util.guess_format(nidm_file)) + tmp = Graph() + graph = graph + tmp.parse(nidm_file, format=util.guess_format(nidm_file)) - graph.serialize(filename, format='turtle') + graph.serialize(filename, format="turtle") os.unlink("a.ttl") os.unlink("b.ttl") @@ -153,57 +209,68 @@ def makeTestFile(filename, params): with open("./agent.ttl", "w") as f: f.write(x) + def test_uri_subject_list(): restParser = RestParser(output_format=RestParser.OBJECT_FORMAT) - result = restParser.run(ALL_FILES, '/subjects') + result = restParser.run(ALL_FILES, "/subjects") assert type(result) == dict - assert type(result['subject']) == list - assert len(result['subject']) > 10 + assert type(result["subject"]) == list + assert len(result["subject"]) > 10 + def test_uri_subject_list_with_fields(): restParser = RestParser(output_format=RestParser.OBJECT_FORMAT) - result = restParser.run(ALL_FILES, '/subjects?fields=ilx_0100400,MagneticFieldStrength') # ilx_0100400 "is about" age + result = restParser.run( + ALL_FILES, "/subjects?fields=ilx_0100400,MagneticFieldStrength" + ) # ilx_0100400 "is about" age assert type(result) == dict - assert type(result['subject']) == list - assert len(result['subject']) > 10 + assert type(result["subject"]) == list + assert len(result["subject"]) > 10 - assert type(result['fields']) == dict + assert type(result["fields"]) == dict all_fields = [] - for uuid in result['fields']: - assert type(result['fields']) == dict - for sub in result['fields']: - assert type(result['fields'][sub]) == dict - for activity in result['fields'][sub]: - all_fields.append(result['fields'][sub][activity].label) - if result['fields'][sub][activity].value != 'n/a': - assert float(result['fields'][sub][activity].value) > 0 - assert float(result['fields'][sub][activity].value) < 125 - assert 'age' in all_fields - assert 'MagneticFieldStrength' in all_fields + for uuid in result["fields"]: + assert type(result["fields"]) == dict + for sub in result["fields"]: + assert type(result["fields"][sub]) == dict + for activity in result["fields"][sub]: + all_fields.append(result["fields"][sub][activity].label) + if result["fields"][sub][activity].value != "n/a": + assert float(result["fields"][sub][activity].value) > 0 + assert float(result["fields"][sub][activity].value) < 125 + assert "age" in all_fields + assert "MagneticFieldStrength" in all_fields -def test_uri_project_list(): +def test_uri_project_list(): import uuid - kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", + Constants.NIDM_PROJECT_IDENTIFIER: 9610, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation", + } proj1_uuid = str(uuid.uuid1()) proj2_uuid = str(uuid.uuid1()) - project = Project(uuid=proj1_uuid,attributes=kwargs) - #save a turtle file - with open("uritest.ttl",'w') as f: + project = Project(uuid=proj1_uuid, attributes=kwargs) + # save a turtle file + with open("uritest.ttl", "w") as f: f.write(project.serializeTurtle()) - kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseIII",Constants.NIDM_PROJECT_IDENTIFIER:1200,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation2"} - project = Project(uuid=proj2_uuid,attributes=kwargs) - #save a turtle file - with open("uritest2.ttl",'w') as f: + kwargs = { + Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseIII", + Constants.NIDM_PROJECT_IDENTIFIER: 1200, + Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation2", + } + project = Project(uuid=proj2_uuid, attributes=kwargs) + # save a turtle file + with open("uritest2.ttl", "w") as f: f.write(project.serializeTurtle()) restParser = RestParser() - result = restParser.run(['uritest.ttl', 'uritest2.ttl'], '/projects') - + result = restParser.run(["uritest.ttl", "uritest2.ttl"], "/projects") project_uuids = [] @@ -220,49 +287,49 @@ def test_uri_project_list(): def test_uri_project_id(): - # try with the real brain volume files restParser = RestParser() # result = restParser.run(OPENNEURO_FILES, '/projects') project = OPENNEURO_PROJECT_URI - result = restParser.run(OPENNEURO_FILES, '/projects/{}'.format(project)) - - - assert 'dctypes:title' in result - assert 'sio:Identifier' in result - assert 'subjects' in result - assert len(result['subjects']['uuid']) > 2 - assert 'data_elements' in result - assert len(result['data_elements']['uuid']) > 1 + result = restParser.run(OPENNEURO_FILES, "/projects/{}".format(project)) + assert "dctypes:title" in result + assert "sio:Identifier" in result + assert "subjects" in result + assert len(result["subjects"]["uuid"]) > 2 + assert "data_elements" in result + assert len(result["data_elements"]["uuid"]) > 1 def test_uri_projects_subjects_1(): global test_p2_subject_uuids - proj_uuid = 'p2' + proj_uuid = "p2" restParser = RestParser() - result = restParser.run([REST_TEST_FILE], '/projects/{}/subjects'.format(proj_uuid)) + result = restParser.run([REST_TEST_FILE], "/projects/{}/subjects".format(proj_uuid)) assert type(result) == dict - assert len(result['uuid']) == 2 + assert len(result["uuid"]) == 2 + + assert test_p2_subject_uuids[0] in result["uuid"] + assert test_p2_subject_uuids[1] in result["uuid"] - assert test_p2_subject_uuids[0] in result['uuid'] - assert test_p2_subject_uuids[1] in result['uuid'] def test_uri_subjects(): global cmu_test_subject_uuid restParser = RestParser() restParser.setOutputFormat(RestParser.OBJECT_FORMAT) - result = restParser.run(BRAIN_VOL_FILES, '/subjects/{}'.format(cmu_test_subject_uuid)) + result = restParser.run( + BRAIN_VOL_FILES, "/subjects/{}".format(cmu_test_subject_uuid) + ) assert type(result) == dict - assert 'uuid' in result - assert 'instruments' in result - assert 'derivatives' in result + assert "uuid" in result + assert "instruments" in result + assert "derivatives" in result - assert cmu_test_subject_uuid == result['uuid'] + assert cmu_test_subject_uuid == result["uuid"] def test_uri_projects_subjects_id(): @@ -271,27 +338,26 @@ def test_uri_projects_subjects_id(): restParser = RestParser() # result = restParser.run(OPENNEURO_FILES, '/projects') project = OPENNEURO_PROJECT_URI - result = restParser.run(OPENNEURO_FILES, '/projects/{}/subjects'.format(project)) - subject = result['uuid'][0] + result = restParser.run(OPENNEURO_FILES, "/projects/{}/subjects".format(project)) + subject = result["uuid"][0] - uri = '/projects/{}/subjects/{}'.format(project,subject) + uri = "/projects/{}/subjects/{}".format(project, subject) result = restParser.run(OPENNEURO_FILES, uri) assert type(result) == dict - assert result['uuid'] == subject - assert len(result['instruments']) > 2 + assert result["uuid"] == subject + assert len(result["instruments"]) > 2 - instruments = result['instruments'].values() + instruments = result["instruments"].values() all_keys = [] for i in instruments: all_keys += i.keys() - assert 'age' in all_keys + assert "age" in all_keys # current test data doesn't have derivatives! # assert len(result['derivatives']) > 0 - def test_get_software_agents(): nidm_file = BRAIN_VOL_FILES[0] rdf_graph = Query.OpenGraph(nidm_file) @@ -300,16 +366,14 @@ def test_get_software_agents(): assert len(agents) > 0 - isa = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') - + isa = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") count = 0 for a in agents: - for s, o, p in rdf_graph.triples( (a, isa, Constants.PROV['Agent']) ): + for s, o, p in rdf_graph.triples((a, isa, Constants.PROV["Agent"])): count += 1 - assert (count == len(agents)) - + assert count == len(agents) def test_brain_vols(): @@ -317,47 +381,45 @@ def test_brain_vols(): if cmu_test_project_uuid: project = cmu_test_project_uuid else: - project = (restParser.run(BRAIN_VOL_FILES, '/projects'))[0] - subjects = restParser.run(BRAIN_VOL_FILES, '/projects/{}/subjects'.format(project)) - subject = subjects['uuid'][0] + project = (restParser.run(BRAIN_VOL_FILES, "/projects"))[0] + subjects = restParser.run(BRAIN_VOL_FILES, "/projects/{}/subjects".format(project)) + subject = subjects["uuid"][0] data = Query.GetDerivativesDataForSubject(BRAIN_VOL_FILES, None, subject) - - assert(len(data) > 0) + assert len(data) > 0 for key in data: - assert('StatCollectionType' in data[key]) - assert('URI' in data[key]) - assert('values' in data[key]) + assert "StatCollectionType" in data[key] + assert "URI" in data[key] + assert "values" in data[key] def test_GetParticipantDetails(): - import time + start = time.time() restParser = RestParser() if cmu_test_project_uuid: project = cmu_test_project_uuid else: - projects = restParser.run(BRAIN_VOL_FILES, '/projects') + projects = restParser.run(BRAIN_VOL_FILES, "/projects") project = projects[0] import time - start = time.time() - subjects = restParser.run(BRAIN_VOL_FILES, '/projects/{}/subjects'.format(project)) - subject = subjects['uuid'][0] + start = time.time() + subjects = restParser.run(BRAIN_VOL_FILES, "/projects/{}/subjects".format(project)) + subject = subjects["uuid"][0] - Query.GetParticipantInstrumentData( BRAIN_VOL_FILES, project, subject ) - + Query.GetParticipantInstrumentData(BRAIN_VOL_FILES, project, subject) - details = Query.GetParticipantDetails( BRAIN_VOL_FILES, project, subject ) + details = Query.GetParticipantDetails(BRAIN_VOL_FILES, project, subject) - assert ('uuid' in details) - assert ('id' in details) - assert ('activity' in details) - assert ('instruments' in details) - assert ('derivatives' in details) + assert "uuid" in details + assert "id" in details + assert "activity" in details + assert "instruments" in details + assert "derivatives" in details end = time.time() runtime = end - start @@ -369,57 +431,85 @@ def test_CheckSubjectMatchesFilter(): if cmu_test_project_uuid: project = cmu_test_project_uuid else: - projects = restParser.run(BRAIN_VOL_FILES, '/projects') + projects = restParser.run(BRAIN_VOL_FILES, "/projects") project = projects[0] - subjects = restParser.run(BRAIN_VOL_FILES, '/projects/{}/subjects'.format(project)) - subject = subjects['uuid'][0] + subjects = restParser.run(BRAIN_VOL_FILES, "/projects/{}/subjects".format(project)) + subject = subjects["uuid"][0] derivatives = Query.GetDerivativesDataForSubject(BRAIN_VOL_FILES, project, subject) for skey in derivatives: - for vkey in derivatives[skey]['values']: + for vkey in derivatives[skey]["values"]: dt = vkey - val = derivatives[skey]['values'][vkey]['value'] - if (dt and val): + val = derivatives[skey]["values"][vkey]["value"] + if dt and val: break # find an actual stat and build a matching filter to make sure our matcher passes it - filter = "derivatives.{} eq {}".format(dt,val) - assert Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, filter) + filter = "derivatives.{} eq {}".format(dt, val) + assert Query.CheckSubjectMatchesFilter(BRAIN_VOL_FILES, project, subject, filter) - - instruments = Query.GetParticipantInstrumentData( BRAIN_VOL_FILES, project, subject ) - for (i,inst) in instruments.items(): - if 'AGE_AT_SCAN' in inst: - age = inst['AGE_AT_SCAN'] + instruments = Query.GetParticipantInstrumentData(BRAIN_VOL_FILES, project, subject) + for i, inst in instruments.items(): + if "AGE_AT_SCAN" in inst: + age = inst["AGE_AT_SCAN"] older = str(float(age) + 1) younger = str(float(age) - 1) - assert Query.CheckSubjectMatchesFilter(BRAIN_VOL_FILES, project, subject, - "instruments.AGE_AT_SCAN eq {}".format(str(age))) - assert (Query.CheckSubjectMatchesFilter(BRAIN_VOL_FILES, project, subject, - "instruments.AGE_AT_SCAN lt {}".format(younger)) == False) - assert (Query.CheckSubjectMatchesFilter(BRAIN_VOL_FILES, project, subject, - "instruments.AGE_AT_SCAN gt {}".format(younger)) == True) - assert Query.CheckSubjectMatchesFilter(BRAIN_VOL_FILES, project, subject, - "instruments.AGE_AT_SCAN lt {}".format(older)) - assert (Query.CheckSubjectMatchesFilter(BRAIN_VOL_FILES, project, subject, - "instruments.AGE_AT_SCAN gt {}".format(older)) == False) + assert Query.CheckSubjectMatchesFilter( + BRAIN_VOL_FILES, + project, + subject, + "instruments.AGE_AT_SCAN eq {}".format(str(age)), + ) + assert ( + Query.CheckSubjectMatchesFilter( + BRAIN_VOL_FILES, + project, + subject, + "instruments.AGE_AT_SCAN lt {}".format(younger), + ) + == False + ) + assert ( + Query.CheckSubjectMatchesFilter( + BRAIN_VOL_FILES, + project, + subject, + "instruments.AGE_AT_SCAN gt {}".format(younger), + ) + == True + ) + assert Query.CheckSubjectMatchesFilter( + BRAIN_VOL_FILES, + project, + subject, + "instruments.AGE_AT_SCAN lt {}".format(older), + ) + assert ( + Query.CheckSubjectMatchesFilter( + BRAIN_VOL_FILES, + project, + subject, + "instruments.AGE_AT_SCAN gt {}".format(older), + ) + == False + ) # TODO deal with spaces in identifiers and CheckSubjectMatchesFilter - elif 'age at scan' in inst: - age = inst['age at scan'] + elif "age at scan" in inst: + age = inst["age at scan"] older = str(float(age) + 1) younger = str(float(age) - 1) - assert inst['age at scan'] != None + assert inst["age at scan"] != None - #assert Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, "instruments.age at scan eq {}".format( str(age) ) ) - #assert (Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, "instruments.age at scan lt {}".format( younger ) ) == False) - #assert (Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, "instruments.age at scan gt {}".format( younger) ) == True) - #assert Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, "instruments.age at scan lt {}".format( older ) ) - #assert (Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, "instruments.age at scan gt {}".format( older) ) == False) + # assert Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, "instruments.age at scan eq {}".format( str(age) ) ) + # assert (Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, "instruments.age at scan lt {}".format( younger ) ) == False) + # assert (Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, "instruments.age at scan gt {}".format( younger) ) == True) + # assert Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, "instruments.age at scan lt {}".format( older ) ) + # assert (Query.CheckSubjectMatchesFilter( BRAIN_VOL_FILES, project, subject, "instruments.age at scan gt {}".format( older) ) == False) def test_ExtremeFilters(): @@ -427,40 +517,48 @@ def test_ExtremeFilters(): if cmu_test_project_uuid: project = cmu_test_project_uuid else: - projects = restParser.run(BRAIN_VOL_FILES, '/projects') + projects = restParser.run(BRAIN_VOL_FILES, "/projects") project = projects[0] - details = restParser.run(BRAIN_VOL_FILES, '/projects/{}?filter=AGE_AT_SCAN gt 200'.format(project)) - assert len(details['subjects']['uuid']) == 0 - assert len(details['data_elements']['uuid']) > 0 + details = restParser.run( + BRAIN_VOL_FILES, "/projects/{}?filter=AGE_AT_SCAN gt 200".format(project) + ) + assert len(details["subjects"]["uuid"]) == 0 + assert len(details["data_elements"]["uuid"]) > 0 + + details = restParser.run( + BRAIN_VOL_FILES, + "/projects/{}?filter=instruments.AGE_AT_SCAN gt 0".format(project), + ) + assert len(details["subjects"]["uuid"]) > 0 + assert len(details["data_elements"]["uuid"]) > 0 - details = restParser.run(BRAIN_VOL_FILES, '/projects/{}?filter=instruments.AGE_AT_SCAN gt 0'.format(project)) - assert len(details['subjects']['uuid']) > 0 - assert len(details['data_elements']['uuid']) > 0 def test_Filter_Flexibility(): restParser = RestParser(output_format=RestParser.OBJECT_FORMAT) if cmu_test_project_uuid: project = cmu_test_project_uuid else: - projects = restParser.run(BRAIN_VOL_FILES, '/projects') + projects = restParser.run(BRAIN_VOL_FILES, "/projects") project = projects[0] - synonyms = Query.GetDatatypeSynonyms(tuple(BRAIN_VOL_FILES),project, 'ADOS_MODULE') + synonyms = Query.GetDatatypeSynonyms(tuple(BRAIN_VOL_FILES), project, "ADOS_MODULE") real_synonyms = [x for x in synonyms if len(x) > 1] assert len(real_synonyms) > 1 for syn in real_synonyms: - if ' ' in syn: + if " " in syn: continue - details = restParser.run(BRAIN_VOL_FILES, '/projects/{}?filter=instruments.{} gt 2'.format(project, syn)) - assert len(details['subjects']['uuid']) > 0 - assert len(details['data_elements']['uuid']) > 0 + details = restParser.run( + BRAIN_VOL_FILES, + "/projects/{}?filter=instruments.{} gt 2".format(project, syn), + ) + assert len(details["subjects"]["uuid"]) > 0 + assert len(details["data_elements"]["uuid"]) > 0 def test_OpenGraph(): - g = Query.OpenGraph(BRAIN_VOL_FILES[0]) assert isinstance(g, rdflib.graph.Graph) @@ -473,52 +571,59 @@ def test_CDEs(): def testrun(): path = os.path.abspath(__file__) - dir_parts = path.split('/') + dir_parts = path.split("/") dir_parts = dir_parts[:-4] dir_parts.append("core") dir_parts.append("cde_dir") dir = "/".join(dir_parts) - graph = getCDEs([ - "{}/ants_cde.ttl".format(dir), - "{}/fs_cde.ttl".format(dir) - ]) + graph = getCDEs(["{}/ants_cde.ttl".format(dir), "{}/fs_cde.ttl".format(dir)]) - units = graph.objects(subject=Constants.FREESURFER['fs_000002'], predicate=Constants.NIDM['hasUnit']) + units = graph.objects( + subject=Constants.FREESURFER["fs_000002"], + predicate=Constants.NIDM["hasUnit"], + ) count = 0 for u in units: count += 1 - assert str(u) == 'mm^2' + assert str(u) == "mm^2" assert count == 1 testrun() - getCDEs.cache = None # clear the memory cache and try again - testrun() # run a second time to test disk caching. + getCDEs.cache = None # clear the memory cache and try again + testrun() # run a second time to test disk caching. + def assess_one_col_output(txt_output): # print (txt_output) lines = txt_output.strip().splitlines() - while not re.search('[a-zA-Z]', lines[0]): # sometimes we get a blank main table, that is ok, just remove it and look at the next table + while not re.search( + "[a-zA-Z]", lines[0] + ): # sometimes we get a blank main table, that is ok, just remove it and look at the next table lines = lines[1:] - if not (re.search('UUID', lines[0]) or re.search('uuid', lines[0])): - print (lines) - assert re.search('UUID', lines[0]) or re.search('uuid', lines[0]) + if not (re.search("UUID", lines[0]) or re.search("uuid", lines[0])): + print(lines) + assert re.search("UUID", lines[0]) or re.search("uuid", lines[0]) # assert re.search('^-+$', lines[1]) found_uuid = False ###added by DBK to deal with varying line numbers for uuids depending on the rest query type for line in lines: - if is_uuid(line.strip('\"')): + if is_uuid(line.strip('"')): assert True - return line.strip('\"') + return line.strip('"') # if we didn't find a line with a uuid then we simply flag a false assertion and return the first line of output # cause it doesn't really matter at this point the assertion already failed assert False return lines[0] + def is_uuid(uuid): - return re.search('^[0-9a-z]+-[0-9a-z]+-[0-9a-z]+-[0-9a-z]+-[0-9a-z]+$', uuid) != None + return ( + re.search("^[0-9a-z]+-[0-9a-z]+-[0-9a-z]+-[0-9a-z]+-[0-9a-z]+$", uuid) != None + ) + def test_cli_rest_routes(): rest_parser = RestParser(verbosity_level=0) @@ -529,26 +634,31 @@ def test_cli_rest_routes(): # text = rest_parser.run(BRAIN_VOL_FILES, "/projects") - project_uuid = assess_one_col_output( text ) - + project_uuid = assess_one_col_output(text) # # /statistics/projects/{} # - txt_out = rest_parser.run(BRAIN_VOL_FILES, "/statistics/projects/{}".format(project_uuid)) + txt_out = rest_parser.run( + BRAIN_VOL_FILES, "/statistics/projects/{}".format(project_uuid) + ) lines = txt_out.strip().splitlines() - assert re.search('^-+ +-+$', lines[0]) - lines = lines[1:] # done testing line one, slice it off + assert re.search("^-+ +-+$", lines[0]) + lines = lines[1:] # done testing line one, slice it off - split_lines = [ str.split(x) for x in lines ] + split_lines = [str.split(x) for x in lines] found_gender = found_age_max = found_age_min = found_title = False for split in split_lines: - if len(split) > 0: # skip blank lines between apendicies - if re.search('title', split[0]): found_title = True - if re.search('age_max', split[0]): found_age_max = True - if re.search('age_min', split[0]): found_age_min = True - if re.search('gender', split[0]): found_gender = True + if len(split) > 0: # skip blank lines between apendicies + if re.search("title", split[0]): + found_title = True + if re.search("age_max", split[0]): + found_age_max = True + if re.search("age_min", split[0]): + found_age_min = True + if re.search("gender", split[0]): + found_gender = True assert found_title assert found_age_max @@ -559,101 +669,114 @@ def test_cli_rest_routes(): # /projects/{}/subjects # - sub_text = rest_parser.run(BRAIN_VOL_FILES, '/projects/{}/subjects'.format(project_uuid)) - subject_uuid = assess_one_col_output( sub_text ) + sub_text = rest_parser.run( + BRAIN_VOL_FILES, "/projects/{}/subjects".format(project_uuid) + ) + subject_uuid = assess_one_col_output(sub_text) # # /projects/{}/subjects/{}/instruments # # result should be in 3 sections: summary , derivatives, instruments - - inst_text = rest_parser.run(BRAIN_VOL_FILES, '/projects/{}/subjects/{}/'.format(project_uuid, subject_uuid)) + inst_text = rest_parser.run( + BRAIN_VOL_FILES, "/projects/{}/subjects/{}/".format(project_uuid, subject_uuid) + ) sections = inst_text.split("\n\n") # summary tests - summary_lines = sections[0].strip().splitlines()[1:-1] # first and last lines should be ----- + summary_lines = ( + sections[0].strip().splitlines()[1:-1] + ) # first and last lines should be ----- summary = dict() for l in summary_lines: summary[l.split()[0]] = l.split()[1] - inst_uuid = summary['instruments'].split(',')[0] - deriv_uuid = summary['derivatives'].split(',')[0] + inst_uuid = summary["instruments"].split(",")[0] + deriv_uuid = summary["derivatives"].split(",")[0] assert is_uuid(inst_uuid) assert is_uuid(deriv_uuid) # derivatives test deriv_lines = sections[1].strip().splitlines() deriv_headers = deriv_lines[0].split() - heads = ['Derivative_UUID', 'Measurement', 'Label', 'Value', 'Datumtype'] + heads = ["Derivative_UUID", "Measurement", "Label", "Value", "Datumtype"] for i in range(len(heads)): assert re.search(heads[i], deriv_headers[i], re.IGNORECASE) d_uuid = deriv_lines[2].split()[0] assert is_uuid(d_uuid) - assert d_uuid in summary['derivatives'].split(',') + assert d_uuid in summary["derivatives"].split(",") - #instruments test + # instruments test inst_lines = sections[2].strip().splitlines() inst_headers = inst_lines[0].split() - heads = ['Instrument_UUID', 'Category', 'Value'] + heads = ["Instrument_UUID", "Category", "Value"] for i in range(len(heads)): assert re.search(heads[i], inst_headers[i], re.IGNORECASE) i_uuid = inst_lines[2].split()[0] assert is_uuid(i_uuid) - assert i_uuid in summary['instruments'].split(',') + assert i_uuid in summary["instruments"].split(",") + def test_multiple_project_fields(): rest_parser = RestParser(verbosity_level=0) # rest_parser.setOutputFormat(RestParser.CLI_FORMAT) rest_parser.setOutputFormat(RestParser.OBJECT_FORMAT) - field = 'fs_000003,ilx_0100400' # ilx0100400 is 'isAbout' age - fields = rest_parser.run( BRAIN_VOL_FILES, "/projects?fields={}".format(field) ) + field = "fs_000003,ilx_0100400" # ilx0100400 is 'isAbout' age + fields = rest_parser.run(BRAIN_VOL_FILES, "/projects?fields={}".format(field)) # edited by DBK to account for only field values being returned - #assert( 'field_values' in project ) - assert (len(fields) > 0) - #fv = project['field_values'] - print (fields) + # assert( 'field_values' in project ) + assert len(fields) > 0 + # fv = project['field_values'] + print(fields) fv = fields - assert( type( fv ) == list ) - fields_used = set( [ i.label for i in fv ] ) - assert ('brain' in fields_used) or ('Brain Segmentation Volume (mm^3)' in fields_used) - assert 'age at scan' in fields_used + assert type(fv) == list + fields_used = set([i.label for i in fv]) + assert ("brain" in fields_used) or ( + "Brain Segmentation Volume (mm^3)" in fields_used + ) + assert "age at scan" in fields_used + def test_odd_isabout_uris(): rest_parser = RestParser(verbosity_level=0) # rest_parser.setOutputFormat(RestParser.CLI_FORMAT) rest_parser.setOutputFormat(RestParser.OBJECT_FORMAT) - field = 'http://www.cognitiveatlas.org/ontology/cogat.owl#CAO_00962' - fields = rest_parser.run( BRAIN_VOL_FILES, "/projects?fields={}".format(field) ) + field = "http://www.cognitiveatlas.org/ontology/cogat.owl#CAO_00962" + fields = rest_parser.run(BRAIN_VOL_FILES, "/projects?fields={}".format(field)) # edited by DBK to account for only field values being returned - #assert( 'field_values' in project ) - assert (len(fields) > 0) - #fv = project['field_values'] - print (fields) + # assert( 'field_values' in project ) + assert len(fields) > 0 + # fv = project['field_values'] + print(fields) fv = fields - assert( type( fv ) == list ) - fields_used = set( [ i.label for i in fv ] ) - assert 'ADOS_TOTAL' in fields_used + assert type(fv) == list + fields_used = set([i.label for i in fv]) + assert "ADOS_TOTAL" in fields_used def test_project_fields_deriv(): rest_parser = RestParser(verbosity_level=0) rest_parser.setOutputFormat(RestParser.OBJECT_FORMAT) - field = 'fs_000003' - project = rest_parser.run( BRAIN_VOL_FILES, "/projects/{}?fields={}".format(cmu_test_project_uuid, field) ) + field = "fs_000003" + project = rest_parser.run( + BRAIN_VOL_FILES, "/projects/{}?fields={}".format(cmu_test_project_uuid, field) + ) # edited by DBK to account for only field values being returned - #assert( 'field_values' in project ) - assert (len(project) > 0) - #fv = project['field_values'] + # assert( 'field_values' in project ) + assert len(project) > 0 + # fv = project['field_values'] fv = project - assert( type( fv ) == list ) - fields_used = set( [ i.label for i in fv ] ) - assert ('brain' in fields_used) or ('Brain Segmentation Volume (mm^3)' in fields_used) + assert type(fv) == list + fields_used = set([i.label for i in fv]) + assert ("brain" in fields_used) or ( + "Brain Segmentation Volume (mm^3)" in fields_used + ) def test_project_fields_instruments(): @@ -665,18 +788,17 @@ def test_project_fields_instruments(): rest_parser.setOutputFormat(RestParser.OBJECT_FORMAT) - - field = 'age at scan' - uri = "/projects/{}?fields={}".format(proj_uuid,field) - project = rest_parser.run( BRAIN_VOL_FILES, uri) + field = "age at scan" + uri = "/projects/{}?fields={}".format(proj_uuid, field) + project = rest_parser.run(BRAIN_VOL_FILES, uri) # edited by DBK to account for only field values being returned - #assert( 'field_values' in project ) - assert (len(project) > 0) - #fv = project['field_values'] + # assert( 'field_values' in project ) + assert len(project) > 0 + # fv = project['field_values'] fv = project - assert( type( fv ) == list ) - fields_used = set( [ i.label for i in fv ] ) + assert type(fv) == list + fields_used = set([i.label for i in fv]) assert field in fields_used @@ -685,18 +807,19 @@ def test_project_fields_not_found(): rest_parser = RestParser(verbosity_level=0) rest_parser.setOutputFormat(RestParser.OBJECT_FORMAT) - field = 'not_real_field' - project = rest_parser.run( BRAIN_VOL_FILES, "/projects/{}?fields={}".format(cmu_test_project_uuid, field) ) - + field = "not_real_field" + project = rest_parser.run( + BRAIN_VOL_FILES, "/projects/{}?fields={}".format(cmu_test_project_uuid, field) + ) - print (project) - keys = set( [ i for i in project ] ) + print(project) + keys = set([i for i in project]) assert "error" in keys + # ATC - fail def test_GetProjectsComputedMetadata(): - files = [] rest = RestParser() @@ -705,13 +828,20 @@ def test_GetProjectsComputedMetadata(): rest.ExpandProjectMetaData(meta_data) parsed = Query.compressForJSONResponse(meta_data) - for project_id in parsed['projects']: - if parsed['projects'][project_id][str(Constants.NIDM_PROJECT_NAME)] == "ABIDE - CMU_a": + for project_id in parsed["projects"]: + if ( + parsed["projects"][project_id][str(Constants.NIDM_PROJECT_NAME)] + == "ABIDE - CMU_a" + ): p3 = project_id break - assert parsed['projects'][p3][str(Constants.NIDM_PROJECT_NAME)] == "ABIDE - CMU_a" - assert parsed['projects'][p3][Query.matchPrefix(str(Constants.NIDM_NUMBER_OF_SUBJECTS))] == 14 - #assert parsed['projects'][p3]["age_min"] == 21.0 - #assert parsed['projects'][p3]["age_max"] == 33.0 - assert set(parsed['projects'][p3][str(Constants.NIDM_GENDER)]) == set(['1', '2']) - + assert parsed["projects"][p3][str(Constants.NIDM_PROJECT_NAME)] == "ABIDE - CMU_a" + assert ( + parsed["projects"][p3][ + Query.matchPrefix(str(Constants.NIDM_NUMBER_OF_SUBJECTS)) + ] + == 14 + ) + # assert parsed['projects'][p3]["age_min"] == 21.0 + # assert parsed['projects'][p3]["age_max"] == 33.0 + assert set(parsed["projects"][p3][str(Constants.NIDM_GENDER)]) == set(["1", "2"]) diff --git a/nidm/experiment/tools/tests/test_rest_dataelements.py b/nidm/experiment/tools/tests/test_rest_dataelements.py index 2b99c168..9c5f3150 100644 --- a/nidm/experiment/tools/tests/test_rest_dataelements.py +++ b/nidm/experiment/tools/tests/test_rest_dataelements.py @@ -1,52 +1,56 @@ -import urllib +import json +import os +from pathlib import Path import re - -import pytest -import rdflib - -from nidm.experiment import Project, Session, AssessmentAcquisition, AssessmentObject, Acquisition, AcquisitionObject, Query +import urllib from nidm.core import Constants +from nidm.experiment import ( + Acquisition, + AcquisitionObject, + AssessmentAcquisition, + AssessmentObject, + Project, + Query, + Session, +) from nidm.experiment.tools.rest import RestParser -import os -from pathlib import Path -from rdflib import Graph, util, URIRef -import json - - from prov.model import ProvAgent - - -REST_TEST_FILE = './agent.ttl' -BRAIN_VOL_FILES = ['./cmu_a.nidm.ttl', './caltech.nidm.ttl'] -OPENNEURO_FILES = ['ds000002.nidm.ttl', - 'ds000003.nidm.ttl', - 'ds000011.nidm.ttl', - 'ds000017.nidm.ttl', - 'ds000101.nidm.ttl', - 'ds000108.nidm.ttl', - 'ds000113.nidm.ttl', - 'ds000114.nidm.ttl', - 'ds000120.nidm.ttl', - 'ds000122.nidm.ttl', - 'ds000138.nidm.ttl', - 'ds000171.nidm.ttl', - 'ds000208.nidm.ttl', - 'ds000214.nidm.ttl', - 'ds000222.nidm.ttl', - 'ds000224.nidm.ttl', - 'ds000238.nidm.ttl', - 'ds000246.nidm.ttl', - 'ds001021.nidm.ttl', - 'ds001178.nidm.ttl', - 'ds001232.nidm.ttl', - 'ds001241.nidm.ttl' - ] +import pytest +import rdflib +from rdflib import Graph, URIRef, util + +REST_TEST_FILE = "./agent.ttl" +BRAIN_VOL_FILES = ["./cmu_a.nidm.ttl", "./caltech.nidm.ttl"] +OPENNEURO_FILES = [ + "ds000002.nidm.ttl", + "ds000003.nidm.ttl", + "ds000011.nidm.ttl", + "ds000017.nidm.ttl", + "ds000101.nidm.ttl", + "ds000108.nidm.ttl", + "ds000113.nidm.ttl", + "ds000114.nidm.ttl", + "ds000120.nidm.ttl", + "ds000122.nidm.ttl", + "ds000138.nidm.ttl", + "ds000171.nidm.ttl", + "ds000208.nidm.ttl", + "ds000214.nidm.ttl", + "ds000222.nidm.ttl", + "ds000224.nidm.ttl", + "ds000238.nidm.ttl", + "ds000246.nidm.ttl", + "ds001021.nidm.ttl", + "ds001178.nidm.ttl", + "ds001232.nidm.ttl", + "ds001241.nidm.ttl", +] # OPENNEURO_FILES = ['ds000001.nidm.ttl', # 'ds000003.nidm.ttl'] # -ALL_FILES = ['./cmu_a.nidm.ttl', './caltech.nidm.ttl', 'ds000120.nidm.ttl'] +ALL_FILES = ["./cmu_a.nidm.ttl", "./caltech.nidm.ttl", "ds000120.nidm.ttl"] OPENNEURO_PROJECT_URI = None OPENNEURO_SUB_URI = None @@ -55,58 +59,66 @@ cmu_test_project_uuid = None cmu_test_subject_uuid = None + @pytest.fixture(scope="module", autouse="True") def setup(): global cmu_test_project_uuid, cmu_test_subject_uuid, OPENNEURO_PROJECT_URI, OPENNEURO_SUB_URI - - for fname in OPENNEURO_FILES: - dataset = fname.split('.')[0] - if not Path('./{}'.format(fname)).is_file(): + dataset = fname.split(".")[0] + if not Path("./{}".format(fname)).is_file(): urllib.request.urlretrieve( - 'https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/openneuro/{}/nidm.ttl'.format(dataset), - fname + "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/openneuro/{}/nidm.ttl".format( + dataset + ), + fname, ) - if not Path('./cmu_a.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./cmu_a.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl", - "cmu_a.nidm.ttl" + "cmu_a.nidm.ttl", ) - if not Path('./caltech.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./caltech.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/Caltech/nidm.ttl", - "caltech.nidm.ttl" + "caltech.nidm.ttl", ) def test_dataelement_list(): rest_parser = RestParser(output_format=RestParser.OBJECT_FORMAT) - result = rest_parser.run(OPENNEURO_FILES, '/dataelements') + result = rest_parser.run(OPENNEURO_FILES, "/dataelements") assert type(result) == dict assert "data_elements" in result - assert 'uuid' in result["data_elements"] - assert 'label' in result["data_elements"] - assert 'data_type_info' in result["data_elements"] + assert "uuid" in result["data_elements"] + assert "label" in result["data_elements"] + assert "data_type_info" in result["data_elements"] assert len(result["data_elements"]["label"]) != 0 assert len(result["data_elements"]["label"]) == len(result["data_elements"]["uuid"]) - assert len(result["data_elements"]["label"]) == len(result["data_elements"]["data_type_info"]) + assert len(result["data_elements"]["label"]) == len( + result["data_elements"]["data_type_info"] + ) for label in result["data_elements"]["label"]: - assert label in [ str(x["label"]) for x in result["data_elements"]["data_type_info"] ] + assert label in [ + str(x["label"]) for x in result["data_elements"]["data_type_info"] + ] for uuid in result["data_elements"]["uuid"]: - assert uuid in [ str(x["dataElementURI"]) for x in result["data_elements"]["data_type_info"] ] + assert uuid in [ + str(x["dataElementURI"]) for x in result["data_elements"]["data_type_info"] + ] # now check for derivatives - result = rest_parser.run(BRAIN_VOL_FILES, '/dataelements') + result = rest_parser.run(BRAIN_VOL_FILES, "/dataelements") assert type(result) == dict - assert 'Left-WM-hypointensities Volume_mm3 (mm^3)' in result['data_elements']['label'] - + assert ( + "Left-WM-hypointensities Volume_mm3 (mm^3)" in result["data_elements"]["label"] + ) def test_dataelement_details(): @@ -125,10 +137,10 @@ def test_dataelement_details(): # txt = rest_parser.run(OPENNEURO_FILES, '/dataelements/{}'.format(result["data_elements"]["label"][0])) # - - dti = rest_parser.run(OPENNEURO_FILES, '/dataelements/Left-WM-hypointensities Volume_mm3 (mm^3)') - print (dti) - + dti = rest_parser.run( + OPENNEURO_FILES, "/dataelements/Left-WM-hypointensities Volume_mm3 (mm^3)" + ) + print(dti) def test_dataelement_details_in_projects_field(): @@ -138,6 +150,8 @@ def test_dataelement_details_in_projects_field(): # assert len(dti['inProjects']) >= 1 # find a data element that we are using for at least one subject - data_element_label = 'Right-non-WM-hypointensities normMax (MR)' - dti = rest_parser.run(BRAIN_VOL_FILES, '/dataelements/{}'.format(data_element_label)) - assert len (dti['inProjects']) >= 1 + data_element_label = "Right-non-WM-hypointensities normMax (MR)" + dti = rest_parser.run( + BRAIN_VOL_FILES, "/dataelements/{}".format(data_element_label) + ) + assert len(dti["inProjects"]) >= 1 diff --git a/nidm/experiment/tools/tests/test_rest_statistics.py b/nidm/experiment/tools/tests/test_rest_statistics.py index 38e98c37..50fb84cd 100644 --- a/nidm/experiment/tools/tests/test_rest_statistics.py +++ b/nidm/experiment/tools/tests/test_rest_statistics.py @@ -1,23 +1,26 @@ -import urllib +import json +import os +from pathlib import Path import re - -import pytest -import rdflib - -from nidm.experiment import Project, Session, AssessmentAcquisition, AssessmentObject, Acquisition, AcquisitionObject, Query +import urllib from nidm.core import Constants +from nidm.experiment import ( + Acquisition, + AcquisitionObject, + AssessmentAcquisition, + AssessmentObject, + Project, + Query, + Session, +) from nidm.experiment.tools.rest import RestParser -import os -from pathlib import Path -from rdflib import Graph, util, URIRef -import json - - from prov.model import ProvAgent +import pytest +import rdflib +from rdflib import Graph, URIRef, util - -REST_TEST_FILE = './agent.ttl' -BRAIN_VOL_FILES = ['./cmu_a.nidm.ttl', './caltech.nidm.ttl'] +REST_TEST_FILE = "./agent.ttl" +BRAIN_VOL_FILES = ["./cmu_a.nidm.ttl", "./caltech.nidm.ttl"] test_person_uuid = "" test_p2_subject_uuids = [] stat_test_project_uuid = None @@ -25,111 +28,151 @@ cmu_test_project_uuid = None cmu_test_subject_uuid = None + @pytest.fixture(scope="module", autouse="True") def setup(): global cmu_test_project_uuid global cmu_test_subject_uuid - for f in ['./cmu_a.nidm.ttl', 'caltech.nidm.ttl']: + for f in ["./cmu_a.nidm.ttl", "caltech.nidm.ttl"]: if Path(f).is_file(): os.remove(f) - if not Path('./cmu_a.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./cmu_a.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl", - "cmu_a.nidm.ttl" + "cmu_a.nidm.ttl", ) - if not Path('./caltech.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./caltech.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/Caltech/nidm.ttl", - "caltech.nidm.ttl" + "caltech.nidm.ttl", ) restParser = RestParser(output_format=RestParser.OBJECT_FORMAT) - projects = restParser.run(BRAIN_VOL_FILES, '/projects') + projects = restParser.run(BRAIN_VOL_FILES, "/projects") for p in projects: - proj_info = restParser.run(BRAIN_VOL_FILES, '/projects/{}'.format(p)) - if type(proj_info) == dict and 'dctypes:title' in proj_info.keys() and proj_info['dctypes:title'] == 'ABIDE - CMU_a': + proj_info = restParser.run(BRAIN_VOL_FILES, "/projects/{}".format(p)) + if ( + type(proj_info) == dict + and "dctypes:title" in proj_info.keys() + and proj_info["dctypes:title"] == "ABIDE - CMU_a" + ): cmu_test_project_uuid = p break - subjects = restParser.run(BRAIN_VOL_FILES, '/projects/{}/subjects'.format(cmu_test_project_uuid)) - cmu_test_subject_uuid = subjects['uuid'][0] - + subjects = restParser.run( + BRAIN_VOL_FILES, "/projects/{}/subjects".format(cmu_test_project_uuid) + ) + cmu_test_subject_uuid = subjects["uuid"][0] def test_project_statistics(): global cmu_test_project_uuid AGE_CUTOFF = 30 - project = cmu_test_project_uuid # basics stats - basic_project_stats = restParser.run(BRAIN_VOL_FILES, "/statistics/projects/{}".format(project)) - assert 'title' in basic_project_stats + basic_project_stats = restParser.run( + BRAIN_VOL_FILES, "/statistics/projects/{}".format(project) + ) + assert "title" in basic_project_stats # basics stats with subjects - project_stats_with_subjects = restParser.run(BRAIN_VOL_FILES, "/statistics/projects/{}?fields=subjects".format(project)) - assert 'title' in project_stats_with_subjects - assert 'subjects' in project_stats_with_subjects + project_stats_with_subjects = restParser.run( + BRAIN_VOL_FILES, "/statistics/projects/{}?fields=subjects".format(project) + ) + assert "title" in project_stats_with_subjects + assert "subjects" in project_stats_with_subjects # filtered subjects stats - filtered_stats_with_subjects = restParser.run(BRAIN_VOL_FILES, "/statistics/projects/{}?fields=subjects&filter=instruments.AGE_AT_SCAN gt {}".format(project, AGE_CUTOFF)) - assert 'title' in filtered_stats_with_subjects - assert 'subjects' in filtered_stats_with_subjects - assert len(filtered_stats_with_subjects['subjects']) < len(project_stats_with_subjects['subjects']) - + filtered_stats_with_subjects = restParser.run( + BRAIN_VOL_FILES, + "/statistics/projects/{}?fields=subjects&filter=instruments.AGE_AT_SCAN gt {}".format( + project, AGE_CUTOFF + ), + ) + assert "title" in filtered_stats_with_subjects + assert "subjects" in filtered_stats_with_subjects + assert len(filtered_stats_with_subjects["subjects"]) < len( + project_stats_with_subjects["subjects"] + ) # filtered subjects instrument stats - age_stats = restParser.run(BRAIN_VOL_FILES, "/statistics/projects/{}?fields=instruments.AGE_AT_SCAN&filter=instruments.AGE_AT_SCAN gt {}".format(project, AGE_CUTOFF)) - assert 'title' in age_stats - assert 'subjects' in age_stats - assert len(age_stats['subjects']) < len(project_stats_with_subjects['subjects']) - assert 'AGE_AT_SCAN' in age_stats - for x in ['max', 'min', 'mean', 'median', 'standard_deviation']: - assert x in age_stats['AGE_AT_SCAN'] - #assert age_stats['AGE_AT_SCAN']['min'] > AGE_CUTOFF - #assert age_stats['AGE_AT_SCAN']['median'] >= age_stats['AGE_AT_SCAN']['min'] - #assert age_stats['AGE_AT_SCAN']['median'] <= age_stats['AGE_AT_SCAN']['max'] + age_stats = restParser.run( + BRAIN_VOL_FILES, + "/statistics/projects/{}?fields=instruments.AGE_AT_SCAN&filter=instruments.AGE_AT_SCAN gt {}".format( + project, AGE_CUTOFF + ), + ) + assert "title" in age_stats + assert "subjects" in age_stats + assert len(age_stats["subjects"]) < len(project_stats_with_subjects["subjects"]) + assert "AGE_AT_SCAN" in age_stats + for x in ["max", "min", "mean", "median", "standard_deviation"]: + assert x in age_stats["AGE_AT_SCAN"] + # assert age_stats['AGE_AT_SCAN']['min'] > AGE_CUTOFF + # assert age_stats['AGE_AT_SCAN']['median'] >= age_stats['AGE_AT_SCAN']['min'] + # assert age_stats['AGE_AT_SCAN']['median'] <= age_stats['AGE_AT_SCAN']['max'] # filtered subjects instrument and derivative stats - derivative_stats = restParser.run(BRAIN_VOL_FILES, "/statistics/projects/{}?fields=instruments.AGE_AT_SCAN,derivatives.Right-Hippocampus (mm^3)&filter=instruments.AGE_AT_SCAN gt {}".format(project, AGE_CUTOFF)) - assert 'title' in derivative_stats - assert 'subjects' in derivative_stats - assert len(derivative_stats['subjects']) < len(project_stats_with_subjects['subjects']) - for field in ['Right-Hippocampus (mm^3)', 'AGE_AT_SCAN']: + derivative_stats = restParser.run( + BRAIN_VOL_FILES, + "/statistics/projects/{}?fields=instruments.AGE_AT_SCAN,derivatives.Right-Hippocampus (mm^3)&filter=instruments.AGE_AT_SCAN gt {}".format( + project, AGE_CUTOFF + ), + ) + assert "title" in derivative_stats + assert "subjects" in derivative_stats + assert len(derivative_stats["subjects"]) < len( + project_stats_with_subjects["subjects"] + ) + for field in ["Right-Hippocampus (mm^3)", "AGE_AT_SCAN"]: assert field in derivative_stats - for x in ['max', 'min', 'mean', 'median', 'standard_deviation']: + for x in ["max", "min", "mean", "median", "standard_deviation"]: assert x in derivative_stats[field] + def test_project_statistics_fields(): global cmu_test_project_uuid project = cmu_test_project_uuid # ask for a field based on URI tail - derivative_stats = restParser.run(BRAIN_VOL_FILES, "/statistics/projects/{}?fields=derivatives.fsl_000020".format(project)) - assert 'title' in derivative_stats - assert 'subjects' in derivative_stats - assert len(derivative_stats['subjects']) > 0 - for field in ['fsl_000020']: + derivative_stats = restParser.run( + BRAIN_VOL_FILES, + "/statistics/projects/{}?fields=derivatives.fsl_000020".format(project), + ) + assert "title" in derivative_stats + assert "subjects" in derivative_stats + assert len(derivative_stats["subjects"]) > 0 + for field in ["fsl_000020"]: assert field in derivative_stats - for x in ['max', 'min', 'mean', 'median', 'standard_deviation']: + for x in ["max", "min", "mean", "median", "standard_deviation"]: assert x in derivative_stats[field] # ask for a field based on URI tail - derivative_stats = restParser.run(BRAIN_VOL_FILES, "/statistics/projects/{}?fields=derivatives.fsl_000020,instruments.AGE_AT_SCAN".format(project)) - assert 'title' in derivative_stats - assert 'subjects' in derivative_stats - assert len(derivative_stats['subjects']) > 0 - for field in ['fsl_000020', 'AGE_AT_SCAN']: + derivative_stats = restParser.run( + BRAIN_VOL_FILES, + "/statistics/projects/{}?fields=derivatives.fsl_000020,instruments.AGE_AT_SCAN".format( + project + ), + ) + assert "title" in derivative_stats + assert "subjects" in derivative_stats + assert len(derivative_stats["subjects"]) > 0 + for field in ["fsl_000020", "AGE_AT_SCAN"]: assert field in derivative_stats - for x in ['max', 'min', 'mean', 'median', 'standard_deviation']: + for x in ["max", "min", "mean", "median", "standard_deviation"]: assert x in derivative_stats[field] def test_getTailOfURI(): - assert restParser.getTailOfURI('http://purl.org/nidash/fsl#fsl_000020') == 'fsl_000020' - assert restParser.getTailOfURI('https://surfer.nmr.mgh.harvard.edu/fs_00005') == 'fs_00005' - + assert ( + restParser.getTailOfURI("http://purl.org/nidash/fsl#fsl_000020") == "fsl_000020" + ) + assert ( + restParser.getTailOfURI("https://surfer.nmr.mgh.harvard.edu/fs_00005") + == "fs_00005" + ) diff --git a/nidm/experiment/tools/tests/test_rest_subjects.py b/nidm/experiment/tools/tests/test_rest_subjects.py index 8b3b32aa..de799ddb 100644 --- a/nidm/experiment/tools/tests/test_rest_subjects.py +++ b/nidm/experiment/tools/tests/test_rest_subjects.py @@ -1,21 +1,27 @@ -import urllib -import re +import os +from pathlib import Path import pprint - -import pytest -import rdflib - -from nidm.experiment import Project, Session, AssessmentAcquisition, AssessmentObject, Acquisition, AcquisitionObject, Query -from nidm.experiment.CDE import getCDEs +import re +import urllib from nidm.core import Constants +from nidm.experiment import ( + Acquisition, + AcquisitionObject, + AssessmentAcquisition, + AssessmentObject, + Project, + Query, + Session, +) +from nidm.experiment.CDE import getCDEs from nidm.experiment.tools.rest import RestParser -import os -from pathlib import Path -from rdflib import Graph, util, URIRef +import pytest +import rdflib +from rdflib import Graph, URIRef, util -BRAIN_VOL_FILES = ['./cmu_a.nidm.ttl', './caltech.nidm.ttl'] -OPENNEURO_FILES = ['ds000120.nidm.ttl'] -ALL_FILES = ['./cmu_a.nidm.ttl', './caltech.nidm.ttl', 'ds000120.nidm.ttl'] +BRAIN_VOL_FILES = ["./cmu_a.nidm.ttl", "./caltech.nidm.ttl"] +OPENNEURO_FILES = ["ds000120.nidm.ttl"] +ALL_FILES = ["./cmu_a.nidm.ttl", "./caltech.nidm.ttl", "ds000120.nidm.ttl"] OPENNEURO_PROJECT_URI = None OPENNEURO_SUB_URI = None @@ -24,67 +30,75 @@ cmu_test_project_uuid = None cmu_test_subject_uuid = None + @pytest.fixture(scope="module", autouse="True") def setup(): global cmu_test_project_uuid, cmu_test_subject_uuid, OPENNEURO_PROJECT_URI, OPENNEURO_SUB_URI - if not Path('./cmu_a.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./cmu_a.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/CMU_a/nidm.ttl", - "cmu_a.nidm.ttl" + "cmu_a.nidm.ttl", ) - if not Path('./caltech.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./caltech.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/abide/RawDataBIDS/Caltech/nidm.ttl", - "caltech.nidm.ttl" + "caltech.nidm.ttl", ) restParser = RestParser(output_format=RestParser.OBJECT_FORMAT) - projects = restParser.run(BRAIN_VOL_FILES, '/projects') + projects = restParser.run(BRAIN_VOL_FILES, "/projects") for p in projects: - proj_info = restParser.run(BRAIN_VOL_FILES, '/projects/{}'.format(p)) - if 'dctypes:title' in proj_info.keys() and proj_info['dctypes:title'] == 'ABIDE - CMU_a': + proj_info = restParser.run(BRAIN_VOL_FILES, "/projects/{}".format(p)) + if ( + "dctypes:title" in proj_info.keys() + and proj_info["dctypes:title"] == "ABIDE - CMU_a" + ): cmu_test_project_uuid = p break - subjects = restParser.run(BRAIN_VOL_FILES, '/projects/{}/subjects'.format(cmu_test_project_uuid)) - cmu_test_subject_uuid = subjects['uuid'][0] - + subjects = restParser.run( + BRAIN_VOL_FILES, "/projects/{}/subjects".format(cmu_test_project_uuid) + ) + cmu_test_subject_uuid = subjects["uuid"][0] - if not Path('./ds000120.nidm.ttl').is_file(): - urllib.request.urlretrieve ( + if not Path("./ds000120.nidm.ttl").is_file(): + urllib.request.urlretrieve( "https://raw.githubusercontent.com/dbkeator/simple2_NIDM_examples/master/datasets.datalad.org/openneuro/ds000120/nidm.ttl", - "ds000120.nidm.ttl" + "ds000120.nidm.ttl", ) - projects2 = restParser.run(OPENNEURO_FILES, '/projects') + projects2 = restParser.run(OPENNEURO_FILES, "/projects") for p in projects2: - proj_info = restParser.run(OPENNEURO_FILES, '/projects/{}'.format(p)) - if 'dctypes:title' in proj_info.keys() and proj_info['dctypes:title'] == \ - 'Developmental changes in brain function underlying the influence of reward processing on ' \ - 'inhibitory control (Slot Reward)': + proj_info = restParser.run(OPENNEURO_FILES, "/projects/{}".format(p)) + if ( + "dctypes:title" in proj_info.keys() + and proj_info["dctypes:title"] + == "Developmental changes in brain function underlying the influence of reward processing on " + "inhibitory control (Slot Reward)" + ): OPENNEURO_PROJECT_URI = p - subjects = restParser.run(OPENNEURO_FILES, '/projects/{}/subjects'.format(OPENNEURO_PROJECT_URI)) - OPENNEURO_SUB_URI = subjects['uuid'][0] - + subjects = restParser.run( + OPENNEURO_FILES, "/projects/{}/subjects".format(OPENNEURO_PROJECT_URI) + ) + OPENNEURO_SUB_URI = subjects["uuid"][0] def test_rest_sub_id(): - restParser = RestParser() restParser.setOutputFormat(RestParser.OBJECT_FORMAT) - result = restParser.run(ALL_FILES, '/projects/{}'.format(cmu_test_project_uuid)) + result = restParser.run(ALL_FILES, "/projects/{}".format(cmu_test_project_uuid)) - sub_id = result['subjects']['subject id'][5] - sub_uuid = result['subjects']['uuid'][5] + sub_id = result["subjects"]["subject id"][5] + sub_uuid = result["subjects"]["uuid"][5] - result2 = restParser.run(ALL_FILES, '/subjects/{}'.format(sub_id)) + result2 = restParser.run(ALL_FILES, "/subjects/{}".format(sub_id)) pp = pprint.PrettyPrinter() - pp.pprint('/subjects/{}'.format(sub_id)) + pp.pprint("/subjects/{}".format(sub_id)) # make sure we got the same UUID when looking up by sub id - assert result2['uuid'] == sub_uuid - assert len(result2['instruments']) > 0 + assert result2["uuid"] == sub_uuid + assert len(result2["instruments"]) > 0 diff --git a/nidm/terms/imports/crypto_import.ttl b/nidm/terms/imports/crypto_import.ttl index 84fa0a21..8cc0dac8 100644 --- a/nidm/terms/imports/crypto_import.ttl +++ b/nidm/terms/imports/crypto_import.ttl @@ -19,7 +19,7 @@ ### http://purl.org/dc/terms/format crypto:sha512 rdf:type owl:DatatypeProperty ; - + obo:IAO_0000115 "Secure Hash Algorithm 512." ; - - rdfs:comment "SHA stands for Secure Hash Algorithm. Hash algorithms compute a fixed-length digital representation (known as a message digest) of an input data sequence (the message) of any length." . \ No newline at end of file + + rdfs:comment "SHA stands for Secure Hash Algorithm. Hash algorithms compute a fixed-length digital representation (known as a message digest) of an input data sequence (the message) of any length." . diff --git a/nidm/terms/imports/dc_import.ttl b/nidm/terms/imports/dc_import.ttl index 88ef6148..876c6bfc 100644 --- a/nidm/terms/imports/dc_import.ttl +++ b/nidm/terms/imports/dc_import.ttl @@ -22,9 +22,9 @@ ### http://purl.org/dc/elements/1.1/description dc:description rdf:type owl:ObjectProperty ; - + obo:IAO_0000115 "An account of the resource." ; - + rdfs:comment "Description may include but is not limited to: an abstract, a table of contents, a graphical representation, or a free-text account of the resource." . @@ -34,17 +34,17 @@ dc:description rdf:type owl:ObjectProperty ; dc:identifier rdf:type owl:ObjectProperty ; obo:IAO_0000115 "An unambiguous reference to the resource within a given context." ; - + rdfs:comment "Recommended best practice is to identify the resource by means of a string conforming to a formal identification system. " . - + ### http://purl.org/dc/elements/1.1/source dc:source rdf:type owl:ObjectProperty ; - + obo:IAO_0000115 "A related resource from which the described resource is derived." ; - + rdfs:comment "The described resource may be derived from the related resource in whole or in part. Recommended best practice is to identify the related resource by means of a string conforming to a formal identification system." . @@ -61,9 +61,9 @@ dc:source rdf:type owl:ObjectProperty ; ### http://purl.org/dc/terms/format dct:format rdf:type owl:DatatypeProperty ; - + obo:IAO_0000115 "The file format, physical medium, or dimensions of the resource." ; - + rdfs:comment "Examples of dimensions include size and duration. Recommended best practice is to use a controlled vocabulary such as the list of Internet Media Types [MIME]." . @@ -80,13 +80,12 @@ dct:format rdf:type owl:DatatypeProperty ; ### http://purl.org/dc/dcmitype/Image dctype:Image rdf:type owl:Class ; - + obo:IAO_0000115 "A visual representation other than text." ; - + rdfs:comment "Examples include images and photographs of physical objects, paintings, prints, drawings, other images and graphics, animations and moving pictures, film, diagrams, maps, musical notation. Note that Image may include both electronic and physical representations." . ### Generated by the OWL API (version 3.5.0) http://owlapi.sourceforge.net - diff --git a/nidm/terms/imports/iao_import.ttl b/nidm/terms/imports/iao_import.ttl index 6f371a56..18bfbcce 100644 --- a/nidm/terms/imports/iao_import.ttl +++ b/nidm/terms/imports/iao_import.ttl @@ -13,18 +13,18 @@ @base . rdf:type owl:Ontology ; - + dc:date "2009-07-31"^^xsd:date ; - + owl:versionInfo "2015-02-23"^^xsd:string ; - + rdfs:comment "An information artifact is, loosely, a dependent continuant or its bearer that is created as the result of one or more intentional processes. Examples: uniprot, the english language, the contents of this document or a printout of it, the temperature measurements from a weather balloon. For more information, see the project home page at http://code.google.com/p/information-artifact-ontology/"^^xsd:string , "IDs allocated to related efforts: PNO: IAO_0020000-IAO_0020999, D_ACTS: IAO_0021000-IAO_0021999"^^xsd:string , "IDs allocated to subdomains of IAO. pno.owl: IAO_0020000-IAO_0020999, d-acts.owl: IAO_0021000-IAO_0021999"^^xsd:string , "This file is based on checkout of our SVN repository revision $Revision: 717 $ "^^xsd:string ; - + protege:defaultLanguage "en"^^xsd:string ; - + dc:contributor "Adam Goldstein"@en , "Alan Ruttenberg"@en , "Albert Goldfain"@en , @@ -60,11 +60,11 @@ "William Duncan"@en , "William Hogan"@en , "Yongqun (Oliver) He"@en ; - + foaf:homepage ; - + dc:license ; - + owl:versionIRI iao:iao.owl . @@ -84,13 +84,13 @@ protege:defaultLanguage rdf:type owl:AnnotationProperty . ### http://purl.obolibrary.org/obo/BFO_0000179 obo:BFO_0000179 rdf:type owl:AnnotationProperty ; - + rdfs:label "BFO OWL specification label"@en ; - + obo:IAO_0000232 "Really of interest to developers only"@en ; - + obo:IAO_0000115 "Relates an entity in the ontology to the name of the variable that is used to represent it in the code that generates the BFO OWL file from the lispy specification."@en ; - + rdfs:subPropertyOf rdfs:label . @@ -98,15 +98,15 @@ obo:BFO_0000179 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/BFO_0000180 obo:BFO_0000180 rdf:type owl:AnnotationProperty ; - + rdfs:label "BFO CLIF specification label"@en ; - + obo:IAO_0000119 "Person:Alan Ruttenberg" ; - + obo:IAO_0000232 "Really of interest to developers only"@en ; - + obo:IAO_0000115 "Relates an entity in the ontology to the term that is used to represent it in the the CLIF specification of BFO2"@en ; - + rdfs:subPropertyOf rdfs:label . @@ -114,24 +114,24 @@ obo:BFO_0000180 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000111 obo:IAO_0000111 rdf:type owl:AnnotationProperty ; - + rdfs:label "editor preferred term" , "editor preferred label"@en , "editor preferred term"@en ; - + obo:IAO_0000111 "editor preferred term" ; - + obo:IAO_0000119 "GROUP:OBI:"@en ; - + obo:IAO_0000117 "PERSON:Daniel Schober"@en ; - + obo:IAO_0000115 "The concise, meaningful, and human-friendly name for a class or property preferred by the ontology developers. (US-English)"@en ; - + obo:IAO_0000111 "editor preferred label"@en , "editor preferred term"@en ; - + obo:IAO_0000114 obo:IAO_0000122 ; - + rdfs:isDefinedBy . @@ -139,20 +139,20 @@ obo:IAO_0000111 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000112 obo:IAO_0000112 rdf:type owl:AnnotationProperty ; - + rdfs:label "example of usage"@en ; - + obo:IAO_0000115 "A phrase describing how a class name should be used. May also include other kinds of examples that facilitate immediate understanding of a class semantics, such as widely known prototypical subclasses or instances of the class. Although essential for high level terms, examples for low level terms (e.g., Affymetrix HU133 array) are not"@en ; - + obo:IAO_0000119 "GROUP:OBI:"@en ; - + obo:IAO_0000117 "PERSON:Daniel Schober"@en ; - + obo:IAO_0000111 "example"@en , "example of usage"@en ; - + obo:IAO_0000114 obo:IAO_0000122 ; - + rdfs:isDefinedBy . @@ -160,15 +160,15 @@ obo:IAO_0000112 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000114 obo:IAO_0000114 rdf:type owl:AnnotationProperty ; - + rdfs:label "has curation status"@en ; - + obo:IAO_0000119 "OBI_0000281"@en ; - + obo:IAO_0000117 "PERSON:Alan Ruttenberg"@en , "PERSON:Bill Bug"@en , "PERSON:Melanie Courtot"@en ; - + obo:IAO_0000111 "has curation status"@en . @@ -176,27 +176,27 @@ obo:IAO_0000114 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000115 obo:IAO_0000115 rdf:type owl:AnnotationProperty ; - + rdfs:label "definition"^^xsd:string , "textual definition"^^xsd:string , "definition" , "definition"@en ; - + obo:IAO_0000111 "definition"^^xsd:string , "textual definition"^^xsd:string , "definition" ; - + obo:IAO_0000119 "GROUP:OBI:"@en ; - + obo:IAO_0000117 "PERSON:Daniel Schober"@en ; - + obo:IAO_0000115 "The official OBI definition, explaining the meaning of a class or property. Shall be Aristotelian, formalized and normalized. Can be augmented with colloquial definitions."@en , "The official definition, explaining the meaning of a class or property. Shall be Aristotelian, formalized and normalized. Can be augmented with colloquial definitions."@en ; - + obo:IAO_0000111 "definition"@en ; - + obo:IAO_0000114 obo:IAO_0000122 ; - + rdfs:isDefinedBy . @@ -204,19 +204,19 @@ obo:IAO_0000115 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000116 obo:IAO_0000116 rdf:type owl:AnnotationProperty ; - + rdfs:label "editor note"@en ; - + obo:IAO_0000115 "An administrative note intended for its editor. It may not be included in the publication version of the ontology, so it should contain nothing necessary for end users to understand the ontology."@en ; - + obo:IAO_0000119 "GROUP:OBI:"@en ; - + obo:IAO_0000117 "PERSON:Daniel Schober"@en ; - + obo:IAO_0000111 "editor note"@en ; - + obo:IAO_0000114 obo:IAO_0000122 ; - + rdfs:isDefinedBy . @@ -224,21 +224,21 @@ obo:IAO_0000116 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000117 obo:IAO_0000117 rdf:type owl:AnnotationProperty ; - + rdfs:label "term editor"@en ; - + obo:IAO_0000116 "20110707, MC: label update to term editor and definition modified accordingly. See http://code.google.com/p/information-artifact-ontology/issues/detail?id=115."@en ; - + obo:IAO_0000119 "GROUP:OBI:"@en ; - + obo:IAO_0000115 "Name of editor entering the term in the file. The term editor is a point of contact for information regarding the term. The term editor may be, but is not always, the author of the definition, which may have been worked upon by several people"@en ; - + obo:IAO_0000117 "PERSON:Daniel Schober"@en ; - + obo:IAO_0000111 "term editor"@en ; - + obo:IAO_0000114 obo:IAO_0000122 ; - + rdfs:isDefinedBy . @@ -246,19 +246,19 @@ obo:IAO_0000117 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000118 obo:IAO_0000118 rdf:type owl:AnnotationProperty ; - + rdfs:label "alternative term"@en ; - + obo:IAO_0000115 "An alternative name for a class or property which means the same thing as the preferred name (semantically equivalent)"@en ; - + obo:IAO_0000119 "GROUP:OBI:"@en ; - + obo:IAO_0000117 "PERSON:Daniel Schober"@en ; - + obo:IAO_0000111 "alternative term"@en ; - + obo:IAO_0000114 obo:IAO_0000125 ; - + rdfs:isDefinedBy . @@ -266,20 +266,20 @@ obo:IAO_0000118 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000119 obo:IAO_0000119 rdf:type owl:AnnotationProperty ; - + rdfs:label "definition source"@en ; - + obo:IAO_0000119 "Discussion on obo-discuss mailing-list, see http://bit.ly/hgm99w"^^xsd:string , "GROUP:OBI:"@en ; - + obo:IAO_0000117 "PERSON:Daniel Schober"@en ; - + obo:IAO_0000111 "definition source"@en ; - + obo:IAO_0000115 "formal citation, e.g. identifier in external database to indicate / attribute source(s) for the definition. Free text indicate / attribute source(s) for the definition. EXAMPLE: Author Name, URI, MeSH Term C04, PUBMED ID, Wiki uri on 31.01.2007"@en ; - + obo:IAO_0000114 obo:IAO_0000122 ; - + rdfs:isDefinedBy . @@ -287,17 +287,17 @@ obo:IAO_0000119 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000232 obo:IAO_0000232 rdf:type owl:AnnotationProperty ; - + rdfs:label "curator note"@en ; - + obo:IAO_0000115 "An administrative note of use for a curator but of no use for a user"@en ; - + obo:IAO_0000117 "PERSON:Alan Ruttenberg"@en ; - + obo:IAO_0000111 "curator note"@en ; - + obo:IAO_0000114 obo:IAO_0000122 ; - + rdfs:isDefinedBy . @@ -305,20 +305,20 @@ obo:IAO_0000232 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000412 obo:IAO_0000412 rdf:type owl:AnnotationProperty ; - + rdfs:label "imported from"@en ; - + obo:IAO_0000115 "For external terms/classes, the ontology from which the term was imported"@en ; - + obo:IAO_0000119 "GROUP:OBI:"@en ; - + obo:IAO_0000117 "PERSON:Alan Ruttenberg"@en , "PERSON:Melanie Courtot"@en ; - + obo:IAO_0000111 "imported from"@en ; - + obo:IAO_0000114 obo:IAO_0000125 ; - + rdfs:isDefinedBy . @@ -326,17 +326,17 @@ obo:IAO_0000412 rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000600 obo:IAO_0000600 rdf:type owl:AnnotationProperty ; - + rdfs:label "elucidation"@en ; - + obo:IAO_0000119 "Person:Barry Smith"@en ; - + obo:IAO_0000600 "Primitive terms in a highest-level ontology such as BFO are terms which are so basic to our understanding of reality that there is no way of defining them in a non-circular fashion. For these, therefore, we can provide only elucidations, supplemented by examples and by axioms"@en ; - + obo:IAO_0000111 "elucidation"@en ; - + obo:IAO_0000117 "person:Alan Ruttenberg"@en ; - + rdfs:isDefinedBy . @@ -344,9 +344,9 @@ obo:IAO_0000600 rdf:type owl:AnnotationProperty ; ### http://purl.org/dc/elements/1.1/contributor dc:contributor rdf:type owl:AnnotationProperty ; - + rdfs:label "Contributor"@en-us ; - + rdfs:isDefinedBy dc: . @@ -354,9 +354,9 @@ dc:contributor rdf:type owl:AnnotationProperty ; ### http://purl.org/dc/elements/1.1/date dc:date rdf:type owl:AnnotationProperty ; - + rdfs:label "Date"@en-us ; - + rdfs:isDefinedBy dc: . @@ -397,19 +397,19 @@ foaf:homepage rdf:type owl:AnnotationProperty . ### http://purl.obolibrary.org/obo/IAO_0000136 obo:IAO_0000136 rdf:type owl:ObjectProperty ; - + rdfs:label "is about"@en ; - + obo:IAO_0000119 "Smith, Ceusters, Ruttenberg, 2000 years of philosophy"@en ; - + obo:IAO_0000112 "This document is about information artifacts and their representations"@en ; - + obo:IAO_0000115 "is_about is a (currently) primitive relation that relates an information artifact to an entity."@en ; - + obo:IAO_0000117 "person:Alan Ruttenberg"@en ; - + obo:IAO_0000114 obo:IAO_0000125 ; - + obo:IAO_0000412 . @@ -426,7 +426,7 @@ obo:IAO_0000136 rdf:type owl:ObjectProperty ; ### http://usefulinc.com/ns/doap#revision doap:revision rdf:type owl:AnnotationProperty ; - + obo:IAO_0000412 . @@ -449,7 +449,7 @@ doap:revision rdf:type owl:AnnotationProperty ; ### http://purl.obolibrary.org/obo/IAO_0000002 obo:IAO_0000002 rdf:type owl:NamedIndividual ; - + rdfs:label "example to be eventually removed"@en . @@ -457,9 +457,9 @@ obo:IAO_0000002 rdf:type owl:NamedIndividual ; ### http://purl.obolibrary.org/obo/IAO_0000120 obo:IAO_0000120 rdf:type owl:NamedIndividual ; - + rdfs:label "metadata complete"@en ; - + obo:IAO_0000115 "Class has all its metadata, but is either not guaranteed to be in its final location in the asserted IS_A hierarchy or refers to another class that is not complete."@en . @@ -467,9 +467,9 @@ obo:IAO_0000120 rdf:type owl:NamedIndividual ; ### http://purl.obolibrary.org/obo/IAO_0000121 obo:IAO_0000121 rdf:type owl:NamedIndividual ; - + rdfs:label "organizational term"@en ; - + obo:IAO_0000115 "term created to ease viewing/sort terms for development purpose, and will not be included in a release"@en . @@ -477,9 +477,9 @@ obo:IAO_0000121 rdf:type owl:NamedIndividual ; ### http://purl.obolibrary.org/obo/IAO_0000122 obo:IAO_0000122 rdf:type owl:NamedIndividual ; - + rdfs:label "ready for release"@en ; - + obo:IAO_0000115 "Class has undergone final review, is ready for use, and will be included in the next release. Any class lacking \"ready_for_release\" should be considered likely to change place in hierarchy, have its definition refined, or be obsoleted in the next release. Those classes deemed \"ready_for_release\" will also derived from a chain of ancestor classes that are also \"ready_for_release.\""@en . @@ -487,9 +487,9 @@ obo:IAO_0000122 rdf:type owl:NamedIndividual ; ### http://purl.obolibrary.org/obo/IAO_0000123 obo:IAO_0000123 rdf:type owl:NamedIndividual ; - + rdfs:label "metadata incomplete"@en ; - + obo:IAO_0000115 "Class is being worked on; however, the metadata (including definition) are not complete or sufficiently clear to the branch editors."@en . @@ -497,9 +497,9 @@ obo:IAO_0000123 rdf:type owl:NamedIndividual ; ### http://purl.obolibrary.org/obo/IAO_0000124 obo:IAO_0000124 rdf:type owl:NamedIndividual ; - + rdfs:label "uncurated"@en ; - + obo:IAO_0000115 "Nothing done yet beyond assigning a unique class ID and proposing a preferred term."@en . @@ -507,9 +507,9 @@ obo:IAO_0000124 rdf:type owl:NamedIndividual ; ### http://purl.obolibrary.org/obo/IAO_0000125 obo:IAO_0000125 rdf:type owl:NamedIndividual ; - + rdfs:label "pending final vetting"@en ; - + obo:IAO_0000115 "All definitions, placement in the asserted IS_A hierarchy and required minimal metadata are complete. The class is awaiting a final review by someone other than the term editor."@en . @@ -517,13 +517,13 @@ obo:IAO_0000125 rdf:type owl:NamedIndividual ; ### http://purl.obolibrary.org/obo/IAO_0000423 obo:IAO_0000423 rdf:type owl:NamedIndividual ; - + rdfs:label "to be replaced with external ontology term"@en ; - + obo:IAO_0000117 "Alan Ruttenberg"@en ; - + obo:IAO_0000115 "Terms with this status should eventually replaced with a term from another ontology."@en ; - + obo:IAO_0000119 "group:OBI"@en . @@ -531,13 +531,13 @@ obo:IAO_0000423 rdf:type owl:NamedIndividual ; ### http://purl.obolibrary.org/obo/IAO_0000428 obo:IAO_0000428 rdf:type owl:NamedIndividual ; - + rdfs:label "requires discussion"@en ; - + obo:IAO_0000115 "A term that is metadata complete, has been reviewed, and problems have been identified that require discussion before release. Such a term requires editor note(s) to identify the outstanding issues."@en ; - + obo:IAO_0000117 "Alan Ruttenberg"@en ; - + obo:IAO_0000119 "group:OBI"@en . @@ -551,14 +551,14 @@ obo:IAO_0000428 rdf:type owl:NamedIndividual ; ### http://purl.obolibrary.org/obo/iao/2015-02-23/iao.owl ###iao:iao.owl rdf:type owl:NamedIndividual ; - + ### rdfs:label "IAO Release 2015-02-23" ; - + ### doap:revision "SVN $Revision: 717 $" ; - + ### doap:file-release , ### iao:iao.owl ; - + ### rdfs:seeAlso . @@ -570,4 +570,3 @@ obo:IAO_0000428 rdf:type owl:NamedIndividual ; ### Generated by the OWL API (version 3.5.0) http://owlapi.sourceforge.net - diff --git a/nidm/terms/imports/nfo_import.ttl b/nidm/terms/imports/nfo_import.ttl index 5c548a68..7acd57ec 100644 --- a/nidm/terms/imports/nfo_import.ttl +++ b/nidm/terms/imports/nfo_import.ttl @@ -40,17 +40,16 @@ obo:IAO_0000115 rdf:type owl:AnnotationProperty . ### http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#fileName nfo:fileName rdf:type owl:DatatypeProperty ; - + rdfs:label "fileName" ; - + nrl:maxCardinality "1" ; - + obo:IAO_0000115 "Name of the file, together with the extension" ; - + rdfs:range xsd:string . ### Generated by the OWL API (version 3.5.0) http://owlapi.sourceforge.net - diff --git a/nidm/terms/imports/nlx_import.ttl b/nidm/terms/imports/nlx_import.ttl index 989ba663..5d5dc58f 100644 --- a/nidm/terms/imports/nlx_import.ttl +++ b/nidm/terms/imports/nlx_import.ttl @@ -39,21 +39,21 @@ obo:IAO_0000115 rdf:type owl:AnnotationProperty . ### http://ontology.neuinfo.org/NIF/DigitalEntities/NIF-Investigation.owl#birnlex_2100 nlx:birnlex_2100 rdf:type owl:Class ; - + rdfs:label "Magnetic resonance imaging scanner"^^xsd:string ; - + rdfs:subClassOf nlx:birnlex_2094 ; - + obo_annot:createdDate "2006-05-15"^^xsd:string ; - + obo_annot:modifiedDate "2006-10-11"^^xsd:string ; - + obo_annot:synonym "MRI scanner"^^xsd:string ; - + core:prefLabel "Magnetic resonance imaging scanner"^^xsd:string ; - + birn_annot:hasBirnlexCurator "http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#BIRN_OTF"^^xsd:string ; - + birn_annot:hasCurationStatus "http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#uncurated"^^xsd:string . @@ -61,56 +61,56 @@ nlx:birnlex_2100 rdf:type owl:Class ; ### http://ontology.neuinfo.org/NIF/DigitalEntities/NIF-Investigation.owl#birnlex_2094 nlx:birnlex_2094 rdf:type owl:Class ; - + rdfs:label "Imaging instrument"^^xsd:string ; - + obo_annot:createdDate "2006-05-15"^^xsd:string ; - + obo_annot:modifiedDate "2006-10-11"^^xsd:string ; - + core:prefLabel "Imaging instrument"^^xsd:string ; - + birn_annot:hasBirnlexCurator "http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#BIRN_OTF"^^xsd:string ; - - birn_annot:hasCurationStatus "http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#uncurated"^^xsd:string . - + birn_annot:hasCurationStatus "http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#uncurated"^^xsd:string . + + ### http://uri.interlex.org/base/ixl_0050000 nlx:ixl_0050000 rdf:type owl:Class ; - + rdfs:label "Positron emission tomography scanner"^^xsd:string ; - + rdfs:subClassOf nlx:birnlex_2094 ; - + obo_annot:createdDate "2016-02-12"^^xsd:string ; - + core:definition "A Positron emission tomography scanner is a device used in a nuclear medicine to observe metabolic processes in the body."^^xsd:string ; - + obo_annot:synonym "PET scanner"^^xsd:string ; - + core:prefLabel "Positron emission tomography scanner"^^xsd:string ; - + obo_annot:definingCitationURI "https://en.wikipedia.org/wiki/Positron_emission_tomography"^^xsd:string . ### http://uri.interlex.org/base/ixl_0050001 nlx:ixl_0050001 rdf:type owl:Class ; - + rdfs:label "Single-photon emission computed tomography scanner"^^xsd:string ; - + rdfs:subClassOf nlx:birnlex_2094 ; - + obo_annot:createdDate "2016-02-12"^^xsd:string ; - + core:definition "A Single-photon emission computed tomography scanner is a device used in nuclear medicine tomographic imaging to measure perfusion in the body using gamma rays."^^xsd:string ; - + obo_annot:synonym "SPECT scanner"^^xsd:string ; - + core:prefLabel "Single-photon emission computed tomography scanner"^^xsd:string ; - + obo_annot:definingCitationURI "https://en.wikipedia.org/wiki/Single-photon_emission_computed_tomography"^^xsd:string . @@ -119,19 +119,19 @@ nlx:ixl_0050001 rdf:type owl:Class ; ### http://uri.interlex.org/base/ixl_0050003 nlx:ixl_0050003 rdf:type owl:Class ; - + rdfs:label "Electroencephalography machine"^^xsd:string ; - + rdfs:subClassOf nlx:birnlex_2094 ; - + obo_annot:createdDate "2016-02-12"^^xsd:string ; - + core:definition "A Electroencephalography machine is a device used in electrophysiology to record electrical activity of the brain.."^^xsd:string ; - + obo_annot:synonym "EEG machine"^^xsd:string ; - + core:prefLabel "Electroencephalography machine"^^xsd:string ; - + obo_annot:definingCitationURI "https://en.wikipedia.org/wiki/Electroencephalography"^^xsd:string . @@ -139,17 +139,17 @@ nlx:ixl_0050003 rdf:type owl:Class ; ### http://ontology.neuinfo.org/NIF/DigitalEntities/NIF-Investigation.owl#birnlex_XXX_MEG_machine nlx:ixl_0050002 rdf:type owl:Class ; - + rdfs:label "Magnetoencephalography machine"^^xsd:string ; - + rdfs:subClassOf nlx:birnlex_2094 ; - + obo_annot:createdDate "2016-02-12"^^xsd:string ; - + core:definition "A Magnetoencephalography machine is a device used in functional neuroimaging for mapping brain activity by recording magnetic fields produced by electrical currents occurring naturally in the brain, using very sensitive magnetometers.."^^xsd:string ; - + obo_annot:synonym "MEG machine"^^xsd:string ; - + core:prefLabel "Magnetoencephalography machine"^^xsd:string ; - + obo_annot:definingCitationURI "https://en.wikipedia.org/wiki/Magnetoencephalography"^^xsd:string . diff --git a/nidm/terms/imports/obi_import.ttl b/nidm/terms/imports/obi_import.ttl index 1be70635..e5dfb328 100644 --- a/nidm/terms/imports/obi_import.ttl +++ b/nidm/terms/imports/obi_import.ttl @@ -71,21 +71,21 @@ obo:IAO_0000120 rdf:type owl:AnnotationProperty . ### http://purl.obolibrary.org/obo/OBI_0000636 obo:OBI_0000636 rdf:type owl:Class ; - + rdfs:label "acquisition computer"@en ; - + rdfs:subClassOf obo:OBI_0400107 ; - + obo:IAO_0000119 "http://msi-ontology.sourceforge.net/ontology/NMR.owl#msi_400013"^^xsd:string ; - + obo:IAO_0000115 "A Computer used for NMR, can be divided into central processing unit (CPU), consisting of instruction, interpretation and arithmetic unit plus fast access memory, and peripheral devices such as bulk data storage and input and output devices (including, via the interface, the spectrometer). Under software control, the computer controls the RF pulses and gradients necessary to acquire data, and process the data to produce spectra or images. Note that devices such as the spectrometer may themselves incorporate small computers."@en ; - + obo:IAO_0000119 "GROUP:"@en ; - + obo:IAO_0000117 "PERSON:Daniel Schober"@en ; - + obo:IAO_0000111 "acquisition computer"@en ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -93,19 +93,19 @@ obo:OBI_0000636 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/OBI_0001092 obo:OBI_0001092 rdf:type owl:Class ; - + rdfs:label "computer cluster"^^xsd:string ; - + rdfs:subClassOf obo:OBI_0400107 ; - + obo:IAO_0000115 "A group of linked computers, working together closely so that in many respects they form a single computer."^^xsd:string ; - + obo:IAO_0000117 "PERSON: Erik Segerdell"^^xsd:string ; - + obo:IAO_0000111 "computer cluster"^^xsd:string ; - + obo:IAO_0000119 "http://en.wikipedia.org/wiki/Cluster_(computing)"^^xsd:string ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -114,24 +114,23 @@ obo:OBI_0001092 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/OBI_0400107 obo:OBI_0400107 rdf:type owl:Class ; - + rdfs:label "computer"@en ; - + obo:IAO_0000115 "A computer is an instrument which manipulates (stores, retrieves, and processes) data according to a list of instructions."@en ; - + obo:IAO_0000112 "Apple PowerBook, Dell OptiPlex"@en ; - + obo:IAO_0000117 "Melanie Courtot"@en , "Trish Whetzel"@en ; - + obo:IAO_0000111 "computer"@en ; - + obo:IAO_0000119 "http://en.wikipedia.org/wiki/Computer"@en ; - + obo:IAO_0000114 obo:IAO_0000122 . ### Generated by the OWL API (version 3.5.0) http://owlapi.sourceforge.net - diff --git a/nidm/terms/imports/ontoneurolog_instruments_import.ttl b/nidm/terms/imports/ontoneurolog_instruments_import.ttl index c0009ace..636ab1dc 100644 --- a/nidm/terms/imports/ontoneurolog_instruments_import.ttl +++ b/nidm/terms/imports/ontoneurolog_instruments_import.ttl @@ -10,11 +10,11 @@ @base . rdf:type owl:Ontology ; - + dc:contributor "Benedicte Batrancourt, Michel Dojat, Bernard Gibaud, Gilles Kassel, bernard.gibaud@univ-rennes1.fr"^^rdfs:Literal ; - + dc:date "2009-09-16" . - + ################################################################# @@ -52,7 +52,7 @@ skos:definition rdf:type owl:AnnotationProperty . ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#assessment-instrument :assessment-instrument rdf:type owl:Class ; - + skos:definition """Instrument (CDISC Clinical Research Glossary, v. 8.0): “A means to capture data (e.g., questionnaire, diary) plus all the information and documentation that support its use. NOTE: Generally, instruments include clearly defined methods and instructions for administration or responding, a standard format for data collection, and well-documented methods for scoring, analysis, and interpretation of results.” \"Testing procedures are normally designed to be administered under carefully controlled or standardised conditions that embody systematic scoring protocols\" (Source: ITC International Guidelines for Test Use, http://www.intestcom.org/Guidelines). ASSESSMENT INSTRUMENTS are divided among PSYCHOLOGICAL INSTRUMENTS, BEHAVIOURAL INSTRUMENTS, and NEUROCLINICAL INSTRUMENTS according to the kind of entity (a function or a behaviour) they explore. @@ -64,12 +64,12 @@ ASSESSMENT INSTRUMENTS are divided among MONO-DOMAIN INSTRUMENTS and MULTI-DOMAI ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#behavioural-instrument :behavioural-instrument rdf:type owl:Class ; - + rdfs:subClassOf :assessment-instrument ; - + owl:disjointWith :neuroclinical-instrument , :psychological-instrument ; - + skos:definition """A BEHAVIOURAL INSTRUMENT explores the subject's behavior and provides normative data and consistent measures about intensity/severity of a behavioral trait. In this context, the instrument does not measure the subject performance to realize a specific task and questionnaires are generally used. Each INSTRUMENT VARIABLE of the QUESTIONNAIRE is associated to a SCALE and each subject's answer corresponds to a SCALE ITEM. Generally, BEHAVIOURAL INSTRUMENTS are QUESTIONNAIRES. However, some exceptions exist especially when the explored domain is at the frontier between cognition and behavior (e.g. executive functions such as motivation, planning or social interaction). In these rare cases BEHAVIOURAL INSTRUMENTS are similar to TEST-INSTRUMENTS. “Standardised set of questions or stimuli which are administered to assess or measure the presence or absence of a particular skill, knowledge or behavior.” (Source: CSP/PT). (Concept: (CUI C0683444) behavioral test, Semantic Type: Diagnostic Procedure).""" . @@ -78,9 +78,9 @@ ASSESSMENT INSTRUMENTS are divided among MONO-DOMAIN INSTRUMENTS and MULTI-DOMAI ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#behavioural-interview :behavioural-interview rdf:type owl:Class ; - + rdfs:subClassOf :questionnaire-based-assessment ; - + skos:definition "A behavioural interview is a Questionnaire-based assessment carried out during a behavioral examination." . @@ -88,9 +88,9 @@ ASSESSMENT INSTRUMENTS are divided among MONO-DOMAIN INSTRUMENTS and MULTI-DOMAI ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#behavioural-test :behavioural-test rdf:type owl:Class ; - + rdfs:subClassOf :test-based-assessment ; - + owl:disjointWith :neuroclinical-test , :psychological-test . @@ -99,11 +99,11 @@ ASSESSMENT INSTRUMENTS are divided among MONO-DOMAIN INSTRUMENTS and MULTI-DOMAI ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#bi-coded-scale-item :bi-coded-scale-item rdf:type owl:Class ; - + rdfs:subClassOf :qualitative-scale-item , :quantitative-scale-item ; - - skos:definition """A BI-CODED SCALE ITEM refers both to a qualitative value of the measured QUALITY associated to a quantitative value. Generally the behavior rating scales are built with a set of BI-CODED SCALE ITEM. The rater must decide whether the rating lies on the defined QUANTITATIVE SCALE ITEMS, which are associated with QUALITATIVE SCALE ITEMS. + + skos:definition """A BI-CODED SCALE ITEM refers both to a qualitative value of the measured QUALITY associated to a quantitative value. Generally the behavior rating scales are built with a set of BI-CODED SCALE ITEM. The rater must decide whether the rating lies on the defined QUANTITATIVE SCALE ITEMS, which are associated with QUALITATIVE SCALE ITEMS. Example: The INSTRUMENT ‘Clinical Dementia Rating scale' explores the severity of dementia among the patients. The CODED VARIABLE of the INSTRUMENT ‘Clinical Dementia Rating scale' which measures the QUALITY 'Severity of dementia' has for scale a SCALE in whom, each BI-CODED ITEM SCALE refers both to a particular severity of the dementia by a quantitative way with the following QUANTITATIVE SCALE ITEMS (0, 0.5, 1, 2, 3) and to a particular severity of the dementia (meaning) by a qualitative way with the following QUALITATIVE SCALE ITEM ('No evidence of dementia', 'Questionable dementia', 'Mild dementia', 'Moderate dementia', 'Severe dementia').""" . @@ -111,10 +111,10 @@ Example: The INSTRUMENT ‘Clinical Dementia Rating scale' explores the severity ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#coded-score :coded-score rdf:type owl:Class ; - + rdfs:subClassOf :scale-item , :score ; - + owl:disjointWith :numerical-score . @@ -122,12 +122,12 @@ Example: The INSTRUMENT ‘Clinical Dementia Rating scale' explores the severity ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#coded-variable :coded-variable rdf:type owl:Class ; - + rdfs:subClassOf :instrument-variable ; - + owl:disjointWith :numerical-variable ; - - skos:definition """A CODED VARIABLE is an INSTRUMENT VARIABLE which is associated to a SCALE. The CODED VARIABLE measures a QUALITY whose values are coded by ITEMS of this SCALE. + + skos:definition """A CODED VARIABLE is an INSTRUMENT VARIABLE which is associated to a SCALE. The CODED VARIABLE measures a QUALITY whose values are coded by ITEMS of this SCALE. The CODED VARIABLE of the INSTRUMENT ‘Beck Depression Inventory’ which measures the QUALITY ‘Sadness’ has for scale the following 4-item SCALE: ('I do not feel sad', 'I feel sad much of the time', 'I am sad all the time', 'I am so sad or unhappy that I can't stand it').""" . @@ -135,7 +135,7 @@ The CODED VARIABLE of the INSTRUMENT ‘Beck Depression Inventory’ which measu ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#coded-variable-assessment :coded-variable-assessment rdf:type owl:Class ; - + rdfs:subClassOf :variable-assessment . @@ -143,7 +143,7 @@ The CODED VARIABLE of the INSTRUMENT ‘Beck Depression Inventory’ which measu ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#composite-instrument :composite-instrument rdf:type owl:Class ; - + rdfs:subClassOf :assessment-instrument . @@ -151,12 +151,12 @@ The CODED VARIABLE of the INSTRUMENT ‘Beck Depression Inventory’ which measu ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#corrected-score :corrected-score rdf:type owl:Class ; - + rdfs:subClassOf :numerical-score ; - + owl:disjointWith :raw-score , :standardized-score ; - + skos:definition "The corrected score is the raw score corrected according to the cultural skill, and/or the age, and/or the sex of the subject. The correction is obtained from a table of normative data provided in the manual of the test-based assessment." . @@ -164,7 +164,7 @@ The CODED VARIABLE of the INSTRUMENT ‘Beck Depression Inventory’ which measu ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#experimental-psychology-instrument :experimental-psychology-instrument rdf:type owl:Class ; - + rdfs:subClassOf :psychological-instrument . @@ -172,7 +172,7 @@ The CODED VARIABLE of the INSTRUMENT ‘Beck Depression Inventory’ which measu ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#experimental-psychology-test :experimental-psychology-test rdf:type owl:Class ; - + rdfs:subClassOf :psychological-test . @@ -180,8 +180,8 @@ The CODED VARIABLE of the INSTRUMENT ‘Beck Depression Inventory’ which measu ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#instrument-based-assessment :instrument-based-assessment rdf:type owl:Class ; - - skos:definition """An INSTRUMENT-BASED ASSESSMENT is a SUBJECT DATA ACQUISITION that captures some required information concerning the subject and involves the integration of data from instruments: TEST(-INSTRUMENTS) and/or QUESTIONNAIRES. When the purpose of the patient's examination is the assessment of her/his behavior, the examiner uses questionnaires rather than tests to rate the level of intensity/severity of a behavioral trait. Then, the appropriate action is a BEHAVIOURAL INTERVIEW rather than a BEHAVIOURAL TEST which is less adapted. + + skos:definition """An INSTRUMENT-BASED ASSESSMENT is a SUBJECT DATA ACQUISITION that captures some required information concerning the subject and involves the integration of data from instruments: TEST(-INSTRUMENTS) and/or QUESTIONNAIRES. When the purpose of the patient's examination is the assessment of her/his behavior, the examiner uses questionnaires rather than tests to rate the level of intensity/severity of a behavioral trait. Then, the appropriate action is a BEHAVIOURAL INTERVIEW rather than a BEHAVIOURAL TEST which is less adapted. INSTRUMENT-BASED ASSESSMENTS are divided among TEST-BASED ASSESSMENTS and QUESTIONNAIRE-BASED ASSESSMENTS according to the kind of instrument which is administrated and therefore to the specific roles played by the subject and the healthcare professional in the assessment.""" . @@ -189,7 +189,7 @@ INSTRUMENT-BASED ASSESSMENTS are divided among TEST-BASED ASSESSMENTS and QUESTI ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#instrument-variable :instrument-variable rdf:type owl:Class ; - + skos:definition """An Instrument variable is the part of an instrument that refers to the precise measurement of a specific quality of a subject’s capacity (function) or a subject’s trait (in a qualitative or quantitative way). Instrument variables specify how the registering of scores must be done, by characterizing the type of values of the scores. Several instrument variables may be necessary to characterize the subject’s function or trait explored by the instrument. INSTRUMENT VARIABLES are divided among MAIN and SECONDARY VARIABLES according to whether they explore the same domain than the ASSESSMENT INSTRUMENT. INSTRUMENT VARIABLES are divided among NUMERICAL and CODED VARIABLES according to the way the score they register is coded (i.e. by a NUMBER, a SCALAR QUALE or a SCALE ITEM).""" . @@ -199,11 +199,11 @@ INSTRUMENT VARIABLES are divided among NUMERICAL and CODED VARIABLES according t ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#main-variable :main-variable rdf:type owl:Class ; - + rdfs:subClassOf :instrument-variable ; - + owl:disjointWith :secondary-variable ; - + skos:definition "A main variable explores the same cognitive function or trait of the subject than the instrument." . @@ -211,9 +211,9 @@ INSTRUMENT VARIABLES are divided among NUMERICAL and CODED VARIABLES according t ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#mono-domain-instrument :mono-domain-instrument rdf:type owl:Class ; - + rdfs:subClassOf :assessment-instrument ; - + owl:disjointWith :multi-domains-instrument . @@ -221,7 +221,7 @@ INSTRUMENT VARIABLES are divided among NUMERICAL and CODED VARIABLES according t ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#multi-domains-instrument :multi-domains-instrument rdf:type owl:Class ; - + rdfs:subClassOf :assessment-instrument . @@ -229,12 +229,12 @@ INSTRUMENT VARIABLES are divided among NUMERICAL and CODED VARIABLES according t ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#neuroclinical-instrument :neuroclinical-instrument rdf:type owl:Class ; - + rdfs:subClassOf :assessment-instrument ; - + owl:disjointWith :psychological-instrument ; - - skos:definition """NEUROCLINICAL INSTRUMENTS are used in neurology, to precisely quantify symptoms and related patient disability. + + skos:definition """NEUROCLINICAL INSTRUMENTS are used in neurology, to precisely quantify symptoms and related patient disability. A neuroclinical instrument provides normative data, consistent measures about \"the strength, efficiency, reactivity, and appropriateness of the patient's responses to commands, questions, discrete stimulation of particular neural subsystems\" (Lezak et al., 2004).""" . @@ -242,9 +242,9 @@ A neuroclinical instrument provides normative data, consistent measures about \" ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#neuroclinical-interview :neuroclinical-interview rdf:type owl:Class ; - + rdfs:subClassOf :questionnaire-based-assessment ; - + skos:definition "A neuroclinical interview is a Questionnaire-based assessment carried out during a neurological examination." . @@ -252,11 +252,11 @@ A neuroclinical instrument provides normative data, consistent measures about \" ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#neuroclinical-test :neuroclinical-test rdf:type owl:Class ; - + rdfs:subClassOf :test-based-assessment ; - + owl:disjointWith :psychological-test ; - + skos:definition "“The Neuroclinical examination (Neurological test) is the usual clinical approach to the study of the brain functions. The Neurological test includes extensive study of the brain's chief product-behavior. [...] The neurologist examines the strength, efficiency, reactivity, and appropriateness of the patient's responses to commands, questions, discrete stimulation of particular neural subsystems, and challenges to specific muscle groups and motor patterns. […] In the neurological examination of behavior, the clinician reviews behavior patterns generated by neuroanatomical subsystems, measuring patients' responses in relatively coarse graduations or nothing their absence\" (Lezak et al., 2004)." . @@ -264,10 +264,10 @@ A neuroclinical instrument provides normative data, consistent measures about \" ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#neuropsychological-instrument :neuropsychological-instrument rdf:type owl:Class ; - + rdfs:subClassOf :psychological-instrument ; - - skos:definition """NEUROPSYCHOLOGICAL INSTRUMENTS are used as tools in NEUROPSYCHOLOGICAL EXAMINATIONS to measure a psychological function known to be linked to a particular brain structure or pathway. + + skos:definition """NEUROPSYCHOLOGICAL INSTRUMENTS are used as tools in NEUROPSYCHOLOGICAL EXAMINATIONS to measure a psychological function known to be linked to a particular brain structure or pathway. “Neuropsychological tests are specifically designed tasks used to measure a psychological function known to be linked to a particular brain structure or pathway. They usually involve the systematic administration of clearly defined procedures in a formal environment. Neuropsychological tests are typically administered to a single person working with an examiner in a quiet office environment, free from distractions. As such, it can be argued that neuropsychological tests at times offer an estimate of a person's peak level of cognitive performance. Neuropsychological tests are a core component of the process of conducting neuropsychological assessment” (Source: http://en.wikipedia.org). “Tests designed to assess neurological function associated with certain behaviors; used in diagnosing brain dysfunction or damage and central nervous system disorders or injury.” (Source: CSP/PT). (Concept: (CUI C0027902) Neuropsychological Tests, Semantic Type: Diagnostic Procedure).""" . @@ -276,9 +276,9 @@ A neuroclinical instrument provides normative data, consistent measures about \" ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#neuropsychological-test :neuropsychological-test rdf:type owl:Class ; - + rdfs:subClassOf :psychological-test ; - + skos:definition "\"Neuropsychological assessment is a method of examining the brain by studying its behavioural product. Since the subject matter of neuropsychological assessment is behavior, it relies on many of the same techniques, assumptions, and theories as does psychological assessment. The distinctive character of neuropsychological assessment lies in a conceptual frame of reference that takes brain function as its point of departure. Regardless of whether a behavioral study is undertaken for clinical or research purposes, it is neuropsychological so long as the questions that prompted it, the central issues, the findings, or the inferences drawn from them ultimately relate to brain function\" (Lezak et al., 2004)." . @@ -286,7 +286,7 @@ A neuroclinical instrument provides normative data, consistent measures about \" ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#numerical-score :numerical-score rdf:type owl:Class ; - + rdfs:subClassOf :score . @@ -294,9 +294,9 @@ A neuroclinical instrument provides normative data, consistent measures about \" ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#numerical-variable :numerical-variable rdf:type owl:Class ; - + rdfs:subClassOf :instrument-variable ; - + skos:definition """A NUMERICAL VARIABLE is an INSTRUMENT VARIABLE which measures a QUALITY which has for quales (during a TIME INTERVAL) NUMBERS or SCALAR QUALES (a NUMBER + a UNIT OF MEASURE). Example 1: The NUMERICAL VARIABLE ‘FCSRT-IR-3-1v3' of the INSTRUMENT ‘Free and Cued Selective Reminding Test with Immediate Recall 16-item-version-(FCSRT-IR)’ measures the QUALITY ‘Total number of words correctly retrieved on the three successive trials’ which has for quales NUMBERS. This NUMERICAL VARIABLE has for minimum numerical value 0 and has for maximum numerical value 48. Example 2: The NUMERICAL VARIABLE of the INSTRUMENT ‘Rey-Osterrieth-Complex-Figure-Test-(CFT)-Copy-administration’ measures the QUALITY ‘length of time needed by the subject to copy the figure’ which has for quales SCALAR QUALES. The test is not timed, but the length of time needed to copy the figure is observed, and the average time is about 180 seconds. This NUMERICAL VARIABLE has for minimum scalar value 0 second and has for maximum scalar value a SCALAR QUALE chosen by the examinator beyond 300 seconds.""" . @@ -306,7 +306,7 @@ Example 2: The NUMERICAL VARIABLE of the INSTRUMENT ‘Rey-Osterrieth-Complex-Fi ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#numerical-variable-assessment :numerical-variable-assessment rdf:type owl:Class ; - + rdfs:subClassOf :variable-assessment . @@ -314,9 +314,9 @@ Example 2: The NUMERICAL VARIABLE of the INSTRUMENT ‘Rey-Osterrieth-Complex-Fi ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#psychological-instrument :psychological-instrument rdf:type owl:Class ; - + rdfs:subClassOf :assessment-instrument ; - + skos:definition """PSYCHOLOGICAL INSTRUMENTS are divided among NEUROPSYCHOLOGICAL INSTRUMENTS and EXPERIMENTAL PSYCHOLOGY INSTRUMENTS according to the kind of DOMAINS they explore. A psychological instrument provides normative data, consistent measures about intensity/severity of a psychological and/or psychopathological trait. “Standardized tests designed to measure abilities, as in intelligence, aptitude, and achievement tests, or to evaluate personality traits”. (Source: MSH/MH). (Concept: (CUI C0033905) Psychological Tests (procedure), Semantic Type: Diagnostic Procedure).""" . @@ -326,9 +326,9 @@ A psychological instrument provides normative data, consistent measures about in ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#psychological-interview :psychological-interview rdf:type owl:Class ; - + rdfs:subClassOf :questionnaire-based-assessment ; - + skos:definition "A psychological interview is a Questionnaire-based assessment carried out during a psychological examination." . @@ -336,9 +336,9 @@ A psychological instrument provides normative data, consistent measures about in ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#psychological-test :psychological-test rdf:type owl:Class ; - + rdfs:subClassOf :test-based-assessment ; - + skos:definition """\"[…] psychological assessment […] involves the intensive study of behavior by means of interviews and standardized scaled tests and questionnaires that provide relatively precise and sensitive indices of behaviour.\" (Lezak et al., 2004). PSYCHOLOGICAL TESTS are divided among NEUROPSYCHOLOGICAL TESTS and EXPERIMENTAL PSYCHOLOGY TESTS according to the kind of instrument that is used.""" . @@ -347,7 +347,7 @@ PSYCHOLOGICAL TESTS are divided among NEUROPSYCHOLOGICAL TESTS and EXPERIMENTAL ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#psychophysical-instrument :psychophysical-instrument rdf:type owl:Class ; - + rdfs:subClassOf :experimental-psychology-instrument . @@ -355,7 +355,7 @@ PSYCHOLOGICAL TESTS are divided among NEUROPSYCHOLOGICAL TESTS and EXPERIMENTAL ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#psychophysical-test :psychophysical-test rdf:type owl:Class ; - + rdfs:subClassOf :experimental-psychology-test . @@ -363,10 +363,10 @@ PSYCHOLOGICAL TESTS are divided among NEUROPSYCHOLOGICAL TESTS and EXPERIMENTAL ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#qualitative-scale-item :qualitative-scale-item rdf:type owl:Class ; - + rdfs:subClassOf :scale-item ; - - skos:definition """A QUALITATIVE SCALE ITEM refers to a qualitative value of the measured QUALITY. Each QUALITATIVE SCALE ITEM from the SCALE is designed to report a qualitative attribute about the cognitive ability or the psychological trait or the behavior and therefore to reflect the pathological level/intensity/severity of the behavior or the disability by a qualitative way. + + skos:definition """A QUALITATIVE SCALE ITEM refers to a qualitative value of the measured QUALITY. Each QUALITATIVE SCALE ITEM from the SCALE is designed to report a qualitative attribute about the cognitive ability or the psychological trait or the behavior and therefore to reflect the pathological level/intensity/severity of the behavior or the disability by a qualitative way. Example: The INSTRUMENT ‘Beck Depression Inventory' can distinguish between different subtypes of depressive disorders, such as major depression and dysthymia (a less severe form of depression). The CODED VARIABLE of the INSTRUMENT ‘Beck Depression Inventory' which measures the QUALITY 'Intensity of depression' has for scale a SCALE in whom, each QUALITATIVE ITEM SCALE refers to a particular intensity of the depression by a qualitative way ('minimal depressive symptoms', 'mild depression', 'moderate depression, 'severe depression').""" . @@ -374,9 +374,9 @@ Example: The INSTRUMENT ‘Beck Depression Inventory' can distinguish between di ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#quantitative-scale-item :quantitative-scale-item rdf:type owl:Class ; - + rdfs:subClassOf :scale-item ; - + skos:definition """A QUANTITATIVE SCALE ITEM refers to a quantitative value of the measured quality. Each QUANTITATIVE SCALE ITEM from the SCALE is designed to report a quantitative attribute about the cognitive ability or the psychological trait or the behavior and therefore to reflect the pathological level/intensity of the behavior or the disability by a quantitative way. Example: The INSTRUMENT 'Montgomery-Asberg Depression Rating Scale' (MADRS) is a diagnostic questionnaire which psychiatrists use to measure the severity of depressive episodes in patients with mood disorders. For each CODED VARIABLE, associated to a SCALE, the rater must decide whether the rating lies on the defined QUANTITATIVE SCALE ITEMS (0, 1, 2, 3, 4, 5, 6).""" . @@ -385,11 +385,11 @@ Example: The INSTRUMENT 'Montgomery-Asberg Depression Rating Scale' (MADRS) is a ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#questionnaire :questionnaire rdf:type owl:Class ; - + rdfs:subClassOf :assessment-instrument ; - + owl:disjointWith :test-instrument ; - + skos:definition "Questionnaire (CDISC Clinical Research Glossary, v. 6.0): A set of questions or items shown to a respondent in order to get answers for research purposes." . @@ -397,9 +397,9 @@ Example: The INSTRUMENT 'Montgomery-Asberg Depression Rating Scale' (MADRS) is a ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#questionnaire-based-assessment :questionnaire-based-assessment rdf:type owl:Class ; - + rdfs:subClassOf :instrument-based-assessment ; - + skos:definition """A Questionnaire-based assessment is an Instrument-based assessment which involves the integration of data from questionnaires. A questionnaire-based assessment involves sets of questions (items), carried out in a structured way (known as structured interview) or semi-structured way, to provide normative data. Interview-based measures and rating scales are designed to be completed by clinicians, subjects, parents, caregivers. The scoring of each item contributes to the measure of the explored domain (e.g. Depression). “A directed conversation with the subject aimed at eliciting information for psychiatric diagnosis, evaluation, treatment planning, etc. The interview may be conducted by a social worker or psychologist.” (Source: MSH/MH). (Concept: [CUI C0021819] Interview, Psychological, Semantic Type: Diagnostic Procedure). QUESTIONNAIRE-BASED ASSESSMENTS are divided among PSYCHOLOGICAL INTERVIEWS, BEHAVIOURAL INTERVIEWS, and NEUROCLINICAL INTERVIEWS according to the kind of questionnaire that is used.""" . @@ -409,11 +409,11 @@ QUESTIONNAIRE-BASED ASSESSMENTS are divided among PSYCHOLOGICAL INTERVIEWS, BEHA ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#raw-score :raw-score rdf:type owl:Class ; - + rdfs:subClassOf :numerical-score ; - + owl:disjointWith :standardized-score ; - + skos:definition "(M.D. Lezak et al., 2004): “The raw score is the simple sum of correct answers or correct answers minus a portion of the incorrect ones. For in itself a raw score communicates nothing about its relative value”" . @@ -421,10 +421,10 @@ QUESTIONNAIRE-BASED ASSESSMENTS are divided among PSYCHOLOGICAL INTERVIEWS, BEHA ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#scale :scale rdf:type owl:Class ; - + skos:definition """“A rating scale is a set of categories designed to elicit information about a quantitative or a qualitative attribute in the social sciences” (Source: http://en.wikipedia.org). “Scales can focus on different aspects of mood, behavior, and functional abilities”. (Source: Lezak et al., 2004). -“The \"levels of measurement\", or scales of measure are expressions that typically refer to the theory of scale types developed by the psychologist Stanley Smith Stevens. Stevens proposed his theory in a 1946 Science article titled \"On the theory of scales of measurement\". In that article, Stevens claimed that all measurement in science was conducted using four different types of scales that he called \"nominal\", \"ordinal\", \"interval\" and \"ratio\". … As a matter of fact, most of the scales used widely and effectively by psychologists are ordinal scales. … Psychometricians like to theorise that psychometric tests produce interval scale measures of cognitive abilities (e.g. Lord & Novick, 1968; von Eye, 2005) but there is little prima facie evidence to suggest that such attributes are anything more than ordinal for most psychological data (Cliff, 1996; Cliff & Keats, 2003; Michell, 2008). … There has been, and continues to be, debate about the merits of the classifications, particularly in the cases of the nominal and ordinal classifications (Michell, 1986). Thus, while Stevens' classification is widely adopted, it is by no means universally accepted. … The theory of scale types is the intellectual handmaiden to Stevens' \"operational theory of measurement\", which was to become definitive within psychology and the behavioral sciences, despite Michell's characterization as its being quite at odds with Michell's understanding of measurement in the natural sciences (Michell, 1999). Essentially, the operational theory of measurement was a reaction to the conclusions of a committee established in 1932 by the British Association for the Advancement of Science to investigate the possibility of genuine scientific measurement in the psychological and behavioral sciences.” (Source: http://en.wikipedia.org). +“The \"levels of measurement\", or scales of measure are expressions that typically refer to the theory of scale types developed by the psychologist Stanley Smith Stevens. Stevens proposed his theory in a 1946 Science article titled \"On the theory of scales of measurement\". In that article, Stevens claimed that all measurement in science was conducted using four different types of scales that he called \"nominal\", \"ordinal\", \"interval\" and \"ratio\". … As a matter of fact, most of the scales used widely and effectively by psychologists are ordinal scales. … Psychometricians like to theorise that psychometric tests produce interval scale measures of cognitive abilities (e.g. Lord & Novick, 1968; von Eye, 2005) but there is little prima facie evidence to suggest that such attributes are anything more than ordinal for most psychological data (Cliff, 1996; Cliff & Keats, 2003; Michell, 2008). … There has been, and continues to be, debate about the merits of the classifications, particularly in the cases of the nominal and ordinal classifications (Michell, 1986). Thus, while Stevens' classification is widely adopted, it is by no means universally accepted. … The theory of scale types is the intellectual handmaiden to Stevens' \"operational theory of measurement\", which was to become definitive within psychology and the behavioral sciences, despite Michell's characterization as its being quite at odds with Michell's understanding of measurement in the natural sciences (Michell, 1999). Essentially, the operational theory of measurement was a reaction to the conclusions of a committee established in 1932 by the British Association for the Advancement of Science to investigate the possibility of genuine scientific measurement in the psychological and behavioral sciences.” (Source: http://en.wikipedia.org). Example: The CODED VARIABLE of the INSTRUMENT ‘Beck Depression Inventory’ which measures the QUALITY ‘Sadness’ has for scale the following 4-item SCALE: ('I do not feel sad', 'I feel sad much of the time', 'I am sad all the time', 'I am so sad or unhappy that I can't stand it'). This SCALE has for minimum scale item the SCALE ITEM: 'I do not feel sad' and has for maximum scale item the SCALE ITEM: 'I am so sad or unhappy that I can't stand it'.""" . @@ -432,7 +432,7 @@ Example: The CODED VARIABLE of the INSTRUMENT ‘Beck Depression Inventory’ wh ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#scale-item :scale-item rdf:type owl:Class ; - + skos:definition """During an observation, the process of approximating the observed QUALITY’s value (called QUALE in DOLCE) turns the continuous QUALE of the observed quality into a discrete approximation entity which is a SCALE ITEM. While the construction of the SCALE, a set of QUALITY's values are chosen and each of them is coded by a SCALE ITEM. Generally, a SCALE consists of 4 SCALE ITEMS (short version) or of 6 SCALE ITEMS (long version). Example: In the INSTRUMENT ‘Beck Depression Inventory', the SCALE ITEM 'I feel sad much of the time' belongs to the SCALE which is the scale of the CODED VARIABLE which measures the QUALITY ‘Sadness’. This SCALE ITEM refers to a specific level of sadness.""" . @@ -441,7 +441,7 @@ Example: In the INSTRUMENT ‘Beck Depression Inventory', the SCALE ITEM 'I feel ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#score :score rdf:type owl:Class ; - + skos:definition "SCORES are divided into CODED SCORES AND NUMERICAL SCORES according to whether they are coded by a LEXICAL ITEM or a NUMBER." . @@ -449,7 +449,7 @@ Example: In the INSTRUMENT ‘Beck Depression Inventory', the SCALE ITEM 'I feel ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#score-with-unit-of-measure :score-with-unit-of-measure rdf:type owl:Class ; - + rdfs:subClassOf :numerical-score . @@ -457,7 +457,7 @@ Example: In the INSTRUMENT ‘Beck Depression Inventory', the SCALE ITEM 'I feel ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#score-without-unit-of-measure :score-without-unit-of-measure rdf:type owl:Class ; - + rdfs:subClassOf :numerical-score . @@ -465,9 +465,9 @@ Example: In the INSTRUMENT ‘Beck Depression Inventory', the SCALE ITEM 'I feel ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#secondary-variable :secondary-variable rdf:type owl:Class ; - + rdfs:subClassOf :instrument-variable ; - + skos:definition "A secondary variable provides some additional information about a cognitive function or a trait of the subject." . @@ -475,9 +475,9 @@ Example: In the INSTRUMENT ‘Beck Depression Inventory', the SCALE ITEM 'I feel ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#standardized-score :standardized-score rdf:type owl:Class ; - + rdfs:subClassOf :numerical-score ; - + skos:definition """“To make the comparisons necessary for evaluating impairment, test-makers generally reports scores as values of a scale based on the raw scores made by a standardization population (the group of individual tested for the purpose of obtaining normative data on the test). The scale that is most meaningful statistically is one derived from the normal probability curved and based on the standard deviation unit. There are a variety of standard scores that are all translations of the same scale, based on the mean and the standard deviation. [...] The z-score is the basic, unelaborated standard score from which all others can be derived. The z-score represents, in standard deviation units, the amount a score deviates from the mean of the population from which the score is drawn. The mean of the normal curve is set at zero and the standard deviation unit has a value of one. Scores are stated in term of their distance from the mean as measured in standard deviation units. Scores above the mean have a positive value; those below the mean are negative. Neuropsychological test data can be handled very appropriately in a z-score format. Elaborations of the z-score are called derived scores. Among the most widely used derived scores are T-scores, which are very popular in educational testing. The T-score has a mean of 50 and a standard deviation of 10.” (Lezak et al., 2004)""" . @@ -486,7 +486,7 @@ Elaborations of the z-score are called derived scores. Among the most widely use ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#sub-instrument :sub-instrument rdf:type owl:Class ; - + rdfs:subClassOf :assessment-instrument . @@ -494,9 +494,9 @@ Elaborations of the z-score are called derived scores. Among the most widely use ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#test-based-assessment :test-based-assessment rdf:type owl:Class ; - + rdfs:subClassOf :instrument-based-assessment ; - + skos:definition """A Test-based assessment is an Instrument-based assessment which involves the integration of data from tests. A Test-based assessment is conducted as a formal testing session to provide normative data. TEST-BASED ASSESSMENTS are divided among PSYCHOLOGICAL TESTS, BEHAVIOURAL TESTS and NEUROCLINICAL TESTS according to the kind of instrument that is used.""" . @@ -505,9 +505,9 @@ TEST-BASED ASSESSMENTS are divided among PSYCHOLOGICAL TESTS, BEHAVIOURAL TESTS ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#test-instrument :test-instrument rdf:type owl:Class ; - + rdfs:subClassOf :assessment-instrument ; - + skos:definition "A test-instrument is an instrument that solicits an authentic production from the subject, e.g., a reflex, a performance (such as a drawing, or a 500-meter walk). This production is recorded under control of a healthcare professional, but remains objective." . @@ -515,10 +515,9 @@ TEST-BASED ASSESSMENTS are divided among PSYCHOLOGICAL TESTS, BEHAVIOURAL TESTS ### http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#variable-assessment :variable-assessment rdf:type owl:Class ; - + skos:definition "A variable assessment is an activity that results in a value assigned to a specific item on an assessment instrument question or test." . ### Generated by the OWL API (version 3.5.1) http://owlapi.sourceforge.net - diff --git a/nidm/terms/imports/pato_import.ttl b/nidm/terms/imports/pato_import.ttl index cf7f82fd..34acdbbd 100644 --- a/nidm/terms/imports/pato_import.ttl +++ b/nidm/terms/imports/pato_import.ttl @@ -9,7 +9,7 @@ @prefix oboInOwl: . @base . - rdf:type owl:Ontology ; + rdf:type owl:Ontology ; oboInOwl:date "07:02:2018 10:27"^^xsd:string ; oboInOwl:hasOBOFormatVersion "1.2"^^xsd:string ; oboInOwl:auto-generated-by "OBO-Edit 2.3.1"^^xsd:string ; @@ -27,15 +27,15 @@ ### http://purl.obolibrary.org/obo/PATO_0000001 obo:PATO_0000001 rdf:type owl:Class ; - + rdfs:label "quality"^^xsd:string ; - + obo:IAO_0000115 "A dependent entity that inheres in a bearer by virtue of how the bearer is related to other entities"^^xsd:string ; - + oboInOwl:id "PATO:0000001"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:hasExactSynonym "trait"^^xsd:string . [ rdf:type owl:Axiom ; owl:annotatedTarget "A dependent entity that inheres in a bearer by virtue of how the bearer is related to other entities"^^xsd:string ; @@ -49,17 +49,17 @@ obo:PATO_0000001 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0000047 obo:PATO_0000047 rdf:type owl:Class ; - + rdfs:label "biological sex"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0001995 ; - + obo:IAO_0000115 "An organismal quality inhering in a bearer by virtue of the bearer's ability to undergo sexual reproduction in order to differentiate the individuals or types involved."^^xsd:string ; - + oboInOwl:id "PATO:0000047"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:inSubset pato:attribute_slim . [ rdf:type owl:Axiom ; owl:annotatedTarget "An organismal quality inhering in a bearer by virtue of the bearer's ability to undergo sexual reproduction in order to differentiate the individuals or types involved."^^xsd:string ; @@ -73,17 +73,17 @@ obo:PATO_0000047 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0000186 obo:PATO_0000186 rdf:type owl:Class ; - + rdfs:label "behavioral quality"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0001995 ; - + obo:IAO_0000115 "An organismal quality inhering in a bearer by virtue of the bearer's behavior aggregate of the responses or reactions or movements in a given situation."^^xsd:string ; - + oboInOwl:id "PATO:0000186"^^xsd:string ; - + oboInOwl:hasExactSynonym "behavioural quality"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string . [ rdf:type owl:Axiom ; owl:annotatedTarget "An organismal quality inhering in a bearer by virtue of the bearer's behavior aggregate of the responses or reactions or movements in a given situation."^^xsd:string ; @@ -97,17 +97,17 @@ obo:PATO_0000186 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0000383 obo:PATO_0000383 rdf:type owl:Class ; - + rdfs:label "female"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0001894 ; - + obo:IAO_0000115 "A biological sex quality inhering in an individual or a population that only produces gametes that can be fertilised by male gametes."^^xsd:string ; - + oboInOwl:id "PATO:0000383"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:inSubset pato:mpath_slim , pato:value_slim . [ rdf:type owl:Axiom ; @@ -122,17 +122,17 @@ obo:PATO_0000383 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0000384 obo:PATO_0000384 rdf:type owl:Class ; - + rdfs:label "male"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0001894 ; - + obo:IAO_0000115 "A biological sex quality inhering in an individual or a population whose sex organs contain only male gametes."^^xsd:string ; - + oboInOwl:id "PATO:0000384"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:inSubset pato:mpath_slim , pato:value_slim . [ rdf:type owl:Axiom ; @@ -147,31 +147,31 @@ obo:PATO_0000384 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0001241 obo:PATO_0001241 rdf:type owl:Class ; - + rdfs:label "physical object quality"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0000001 ; - + obo:IAO_0000115 "A quality which inheres in a continuant."^^xsd:string ; - + oboInOwl:id "PATO:0001241"^^xsd:string ; - + rdfs:comment "Relational qualities are qualities that hold between multiple entities. Normal (monadic) qualities such as the shape of a eyeball exist purely as a quality of that eyeball. A relational quality such as sensitivity to light is a quality of that eyeball (and connecting nervous system) as it relates to incoming light waves/particles."^^xsd:string ; - + oboInOwl:hasExactSynonym "monadic quality of a continuant"^^xsd:string ; - + oboInOwl:hasNarrowSynonym "monadic quality of an object"^^xsd:string , "monadic quality of continuant"^^xsd:string ; - + oboInOwl:hasExactSynonym "multiply inhering quality of a physical entity"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:hasExactSynonym "quality of a continuant"^^xsd:string , "quality of a single physical entity"^^xsd:string , "quality of an object"^^xsd:string , "quality of continuant"^^xsd:string ; - + oboInOwl:hasDbXref "snap:Quality"^^xsd:string . [ rdf:type owl:Axiom ; owl:annotatedTarget "A quality which inheres in a continuant."^^xsd:string ; @@ -185,19 +185,19 @@ obo:PATO_0001241 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0001340 obo:PATO_0001340 rdf:type owl:Class ; - + rdfs:label "hermaphrodite"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0001894 ; - + obo:IAO_0000115 "A biological sex quality inhering in an organism or a population with both male and female sexual organs in one individual."^^xsd:string ; - + oboInOwl:id "PATO:0001340"^^xsd:string ; - + oboInOwl:hasExactSynonym "intersex"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:inSubset pato:value_slim . [ rdf:type owl:Axiom ; owl:annotatedTarget "A biological sex quality inhering in an organism or a population with both male and female sexual organs in one individual."^^xsd:string ; @@ -211,17 +211,17 @@ obo:PATO_0001340 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0001827 obo:PATO_0001827 rdf:type owl:Class ; - + rdfs:label "pseudohermaphrodite"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0001894 ; - + obo:IAO_0000115 "A biological sex quality inhering in an individual or a population by virtue of having internal reproductive organs of one sex and external sexual characteristics of the other sex."^^xsd:string ; - + oboInOwl:id "PATO:0001827"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:inSubset pato:value_slim . [ rdf:type owl:Axiom ; owl:annotatedTarget "A biological sex quality inhering in an individual or a population by virtue of having internal reproductive organs of one sex and external sexual characteristics of the other sex."^^xsd:string ; @@ -235,17 +235,17 @@ obo:PATO_0001827 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0001828 obo:PATO_0001828 rdf:type owl:Class ; - + rdfs:label "male pseudohermaphrodite"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0001827 ; - + obo:IAO_0000115 "A biological sex quality inhering in an individual or a population by virtue of having internal reproductive organs of male and external sexual characteristics of female."^^xsd:string ; - + oboInOwl:id "PATO:0001828"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:inSubset pato:value_slim . [ rdf:type owl:Axiom ; owl:annotatedTarget "A biological sex quality inhering in an individual or a population by virtue of having internal reproductive organs of male and external sexual characteristics of female."^^xsd:string ; @@ -259,17 +259,17 @@ obo:PATO_0001828 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0001829 obo:PATO_0001829 rdf:type owl:Class ; - + rdfs:label "female pseudohermaphrodite"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0001827 ; - + obo:IAO_0000115 "A biological sex quality inhering in an individual or a population by virtue of having internal reproductive organs of female and external sexual characteristics of male."^^xsd:string ; - + oboInOwl:id "PATO:0001829"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:inSubset pato:value_slim . [ rdf:type owl:Axiom ; owl:annotatedTarget "A biological sex quality inhering in an individual or a population by virtue of having internal reproductive organs of female and external sexual characteristics of male."^^xsd:string ; @@ -284,15 +284,15 @@ obo:PATO_0001829 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0001894 obo:PATO_0001894 rdf:type owl:Class ; - + rdfs:label "phenotypic sex"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0000047 ; - + oboInOwl:id "PATO:0001894"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:inSubset pato:attribute_slim . @@ -300,15 +300,15 @@ obo:PATO_0001894 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0001995 obo:PATO_0001995 rdf:type owl:Class ; - + rdfs:label "organismal quality"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0001241 ; - + obo:IAO_0000115 "A quality that inheres in an entire organism or part of an organism."^^xsd:string ; - + oboInOwl:id "PATO:0001995"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string . [ rdf:type owl:Axiom ; owl:annotatedTarget "A quality that inheres in an entire organism or part of an organism."^^xsd:string ; @@ -322,23 +322,23 @@ obo:PATO_0001995 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0002201 obo:PATO_0002201 rdf:type owl:Class ; - + rdfs:label "handedness"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0000186 ; - + oboInOwl:creation_date "2010-03-22T05:48:33Z"^^xsd:string ; - + obo:IAO_0000115 "A behavioral quality inhering ina bearer by virtue of the bearer's unequal distribution of fine motor skill between its left and right hands or feet."^^xsd:string ; - + rdfs:comment "Adapted from Wikipedia and the birnlex term that is dbxref'd. Added on behalf of OBI at March 2010 workshop."^^xsd:string ; - + oboInOwl:id "PATO:0002201"^^xsd:string ; - + oboInOwl:created_by "haendel"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string . - + [ rdf:type owl:Axiom ; owl:annotatedTarget "A behavioral quality inhering ina bearer by virtue of the bearer's unequal distribution of fine motor skill between its left and right hands or feet."^^xsd:string ; oboInOwl:hasDbXref "birnlex:2178"^^xsd:string ; @@ -351,21 +351,21 @@ obo:PATO_0002201 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0002202 obo:PATO_0002202 rdf:type owl:Class ; - + rdfs:label "left handedness"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0002201 ; - + oboInOwl:creation_date "2010-03-22T05:49:06Z"^^xsd:string ; - + obo:IAO_0000115 "Handedness where the organism preferentially uses the left hand or foot for tasks requiring the use of a single hand or foot or a dominant hand or foot."^^xsd:string ; - + oboInOwl:id "PATO:0002202"^^xsd:string ; - + oboInOwl:created_by "haendel"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:inSubset pato:disposition_slim , pato:value_slim . [ rdf:type owl:Axiom ; @@ -380,21 +380,21 @@ obo:PATO_0002202 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0002203 obo:PATO_0002203 rdf:type owl:Class ; - + rdfs:label "right handedness"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0002201 ; - + oboInOwl:creation_date "2010-03-22T05:49:19Z"^^xsd:string ; - + obo:IAO_0000115 "Handedness where the organism preferentially uses the right hand or foot for tasks requiring the use of a single hand or foot or a dominant hand or foot."^^xsd:string ; - + oboInOwl:id "PATO:0002203"^^xsd:string ; - + oboInOwl:created_by "haendel"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string ; - + oboInOwl:inSubset pato:disposition_slim , pato:value_slim . [ rdf:type owl:Axiom ; @@ -409,25 +409,24 @@ obo:PATO_0002203 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/PATO_0002204 obo:PATO_0002204 rdf:type owl:Class ; - + rdfs:label "ambidextrous handedness"^^xsd:string ; - + rdfs:subClassOf obo:PATO_0002201 ; - + oboInOwl:creation_date "2010-03-22T05:49:30Z"^^xsd:string ; - + obo:IAO_0000115 "Handedness where the organism exhibits no overall dominance in the use of right or left hand or foot in the performance of tasks that require one hand or foot or a dominant hand or foot."^^xsd:string ; - + oboInOwl:id "PATO:0002204"^^xsd:string ; - + oboInOwl:created_by "haendel"^^xsd:string ; - + oboInOwl:hasOBONamespace "quality"^^xsd:string . - + [ rdf:type owl:Axiom ; owl:annotatedTarget "Handedness where the organism exhibits no overall dominance in the use of right or left hand or foot in the performance of tasks that require one hand or foot or a dominant hand or foot."^^xsd:string ; oboInOwl:hasDbXref "birnlex:2042"^^xsd:string ; owl:annotatedProperty obo:IAO_0000115 ; owl:annotatedSource obo:PATO_0002204 ] . - diff --git a/nidm/terms/imports/prov-o b/nidm/terms/imports/prov-o index 8a4aa1c3..92243dbf 100644 --- a/nidm/terms/imports/prov-o +++ b/nidm/terms/imports/prov-o @@ -835,7 +835,7 @@ A primary source relation is a particular case of derivation of secondary materi :qualifiedForm a owl:AnnotationProperty ; - rdfs:comment """This annotation property links a subproperty of prov:wasInfluencedBy with the subclass of prov:Influence and the qualifying property that are used to qualify it. + rdfs:comment """This annotation property links a subproperty of prov:wasInfluencedBy with the subclass of prov:Influence and the qualifying property that are used to qualify it. Example annotation: @@ -848,7 +848,7 @@ Then this unqualified assertion: can be qualified by adding: :entity1 prov:qualifiedGeneration :entity1Gen . - :entity1Gen + :entity1Gen a prov:Generation, prov:Influence; prov:activity :activity1; :customValue 1337 . @@ -1149,7 +1149,7 @@ Note how the value of the unqualified influence (prov:wasGeneratedBy :activity1) Subproperties of prov:wasInfluencedBy may also be asserted directly without being qualified. -prov:wasInfluencedBy should not be used without also using one of its subproperties. +prov:wasInfluencedBy should not be used without also using one of its subproperties. """@en ; :inverse "influenced" ; :qualifiedForm :Influence, :qualifiedInfluence ; @@ -1236,7 +1236,7 @@ prov:wasInfluencedBy should not be used without also using one of its subpropert a owl:Ontology ; - rdfs:comment """This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). + rdfs:comment """This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). If you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/). All feedback is welcome."""@en ; rdfs:label "W3C PROVenance Interchange Ontology (PROV-O)"@en ; @@ -1313,9 +1313,8 @@ If you wish to make comments regarding this document, please send them to public [] a owl:Axiom ; - rdfs:comment """Revision is a derivation (see http://www.w3.org/TR/prov-dm/#term-Revision). Moreover, according to + rdfs:comment """Revision is a derivation (see http://www.w3.org/TR/prov-dm/#term-Revision). Moreover, according to http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#term-Revision 23 April 2012 'wasRevisionOf is a strict sub-relation of wasDerivedFrom since two entities e2 and e1 may satisfy wasDerivedFrom(e2,e1) without being a variant of each other.'""" ; owl:annotatedProperty rdfs:subPropertyOf ; owl:annotatedSource :wasRevisionOf ; owl:annotatedTarget :wasDerivedFrom . - diff --git a/nidm/terms/imports/prv_import.ttl b/nidm/terms/imports/prv_import.ttl index e65b6ca2..d605df19 100644 --- a/nidm/terms/imports/prv_import.ttl +++ b/nidm/terms/imports/prv_import.ttl @@ -26,19 +26,19 @@ ### http://purl.org/dc/elements/1.1/creator dc:creator rdfs:label "Creator"@en ; - + "1999-07-02"^^xsd:date ; - + "2008-01-14"^^xsd:date ; - + "A second property with the same name as this property has been declared in the dcterms: namespace (http://purl.org/dc/terms/). See the Introduction to the document \"DCMI Metadata Terms\" (http://dublincore.org/documents/dcmi-terms/) for an explanation."@en ; - + rdfs:comment "An entity primarily responsible for making the resource."@en ; - + "Examples of a Creator include a person, an organization, or a service. Typically, the name of a Creator should be used to indicate the entity."@en ; - + ; - + rdfs:isDefinedBy dc: . @@ -46,19 +46,19 @@ dc:creator rdfs:label "Creator"@en ; ### http://purl.org/dc/elements/1.1/date dc:date rdfs:label "Date"@en ; - + "1999-07-02"^^xsd:date ; - + "2008-01-14"^^xsd:date ; - + rdfs:comment "A point or period of time associated with an event in the lifecycle of the resource."@en ; - + "A second property with the same name as this property has been declared in the dcterms: namespace (http://purl.org/dc/terms/). See the Introduction to the document \"DCMI Metadata Terms\" (http://dublincore.org/documents/dcmi-terms/) for an explanation."@en ; - + "Date may be used to express temporal information at any level of granularity. Recommended best practice is to use an encoding scheme, such as the W3CDTF profile of ISO 8601 [W3CDTF]."@en ; - + ; - + rdfs:isDefinedBy dc: . @@ -66,19 +66,19 @@ dc:date rdfs:label "Date"@en ; ### http://purl.org/dc/elements/1.1/description dc:description rdfs:label "Description"@en ; - + "1999-07-02"^^xsd:date ; - + "2008-01-14"^^xsd:date ; - + "A second property with the same name as this property has been declared in the dcterms: namespace (http://purl.org/dc/terms/). See the Introduction to the document \"DCMI Metadata Terms\" (http://dublincore.org/documents/dcmi-terms/) for an explanation."@en ; - + rdfs:comment "An account of the resource."@en ; - + "Description may include but is not limited to: an abstract, a table of contents, a graphical representation, or a free-text account of the resource."@en ; - + ; - + rdfs:isDefinedBy dc: . @@ -86,17 +86,17 @@ dc:description rdfs:label "Description"@en ; ### http://purl.org/dc/elements/1.1/title dc:title rdfs:label "Title"@en ; - + "1999-07-02"^^xsd:date ; - + "2008-01-14"^^xsd:date ; - + rdfs:comment "A name given to the resource."@en ; - + "A second property with the same name as this property has been declared in the dcterms: namespace (http://purl.org/dc/terms/). See the Introduction to the document \"DCMI Metadata Terms\" (http://dublincore.org/documents/dcmi-terms/) for an explanation."@en ; - + ; - + rdfs:isDefinedBy dc: . @@ -170,17 +170,17 @@ xsd:date rdf:type rdfs:Datatype . ### http://purl.org/ontology/prv/core#object_property prv:object_property rdf:type owl:ObjectProperty ; - + rdfs:label "has object property"@en ; - + rdfs:comment "Relates to the property of the reification class, which relates to the object of the \"shortcut relation\"."@en ; - + status:term_status "testing"@en ; - + rdfs:isDefinedBy ; - + rdfs:domain prv:PropertyReification ; - + rdfs:range rdf:Property . @@ -188,34 +188,34 @@ prv:object_property rdf:type owl:ObjectProperty ; ### http://purl.org/ontology/prv/core#reification_class prv:reification_class rdf:type owl:ObjectProperty ; - + rdfs:label "has reification class"@en ; - - rdfs:comment """Relates to the reification class, which can be related to the object and subject property to be able to associate the -object and subject of the \"shortcut relation\". The reification class should provide detailed descriptions of the relationship that is + + rdfs:comment """Relates to the reification class, which can be related to the object and subject property to be able to associate the +object and subject of the \"shortcut relation\". The reification class should provide detailed descriptions of the relationship that is described in a simple form by the \"shortcut relation\"."""@en ; - + status:term_status "testing"@en ; - + rdfs:isDefinedBy ; - + rdfs:domain prv:PropertyReification . ### http://purl.org/ontology/prv/core#shortcut prv:shortcut rdf:type owl:ObjectProperty ; - + rdfs:label "has shortcut"@en ; - + rdfs:comment "Relates to the property of the \"shortcut relation\" (its predicate)."@en ; - + status:term_status "testing"@en ; - + rdfs:isDefinedBy ; - + rdfs:domain prv:PropertyReification ; - + rdfs:range rdf:Property . @@ -223,19 +223,19 @@ prv:shortcut rdf:type owl:ObjectProperty ; ### http://purl.org/ontology/prv/core#shortcut_property prv:shortcut_property rdf:type owl:ObjectProperty ; - + rdfs:label "has shortcut property"@en ; - - rdfs:comment """Relates to the property of the reification class, which relates to the predicate of the \"shortcut relation\". So -that the specific property reification can be \"verified\" (the property of the prv:shortcut relation and this one associated by the + + rdfs:comment """Relates to the property of the reification class, which relates to the predicate of the \"shortcut relation\". So +that the specific property reification can be \"verified\" (the property of the prv:shortcut relation and this one associated by the referred shortcut property should be equal)."""@en ; - + status:term_status "testing"@en ; - + rdfs:isDefinedBy ; - + rdfs:domain prv:PropertyReification ; - + rdfs:range rdf:Property . @@ -243,17 +243,17 @@ referred shortcut property should be equal)."""@en ; ### http://purl.org/ontology/prv/core#subject_property prv:subject_property rdf:type owl:ObjectProperty ; - + rdfs:label "has subject property"@en ; - + rdfs:comment "Relates to the property of the reification class, which relates to the subject of the \"shortcut relation\"."@en ; - + status:term_status "testing"@en ; - + rdfs:isDefinedBy ; - + rdfs:domain prv:PropertyReification ; - + rdfs:range rdf:Property . @@ -270,14 +270,14 @@ prv:subject_property rdf:type owl:ObjectProperty ; ### http://purl.org/ontology/prv/core#PropertyReification prv:PropertyReification rdf:type owl:Class ; - + rdfs:label "Property Reification"@en ; - - rdfs:comment """The class, which describes the relations of a property reification. That means, its \"shortcut relation\", its + + rdfs:comment """The class, which describes the relations of a property reification. That means, its \"shortcut relation\", its reification class, and the properties that are relating to the subject and object of the \"shortcut relation\"."""@en ; - + status:term_status "testing"@en ; - + rdfs:isDefinedBy . @@ -285,5 +285,3 @@ reification class, and the properties that are relating to the subject and objec ### http://www.w3.org/1999/02/22-rdf-syntax-ns#Property rdf:Property rdf:type owl:Class . - - diff --git a/nidm/terms/imports/qibo_import.ttl b/nidm/terms/imports/qibo_import.ttl index 3228ca29..1ba383e2 100644 --- a/nidm/terms/imports/qibo_import.ttl +++ b/nidm/terms/imports/qibo_import.ttl @@ -39,7 +39,7 @@ rdf:type rdf:type owl:AnnotationProperty . ### http://www.owl-ontologies.com/Ontology1298855822.owl#Ex_vivo_subject qibo:Ex_vivo_subject rdf:type owl:Class ; - + rdfs:label "Ex-vivo Subject"^^xsd:string . @@ -47,7 +47,7 @@ qibo:Ex_vivo_subject rdf:type owl:Class ; ### http://www.owl-ontologies.com/Ontology1298855822.owl#In_vitro_subject qibo:In_vitro_subject rdf:type owl:Class ; - + rdfs:label "In-vitro Subject"^^xsd:string . @@ -55,7 +55,7 @@ qibo:In_vitro_subject rdf:type owl:Class ; ### http://www.owl-ontologies.com/Ontology1298855822.owl#In_vivo_subject qibo:In_vivo_subject rdf:type owl:Class ; - + rdfs:label "In-vivo Subject"^^xsd:string . @@ -63,11 +63,10 @@ qibo:In_vivo_subject rdf:type owl:Class ; ### http://www.owl-ontologies.com/Ontology1298855822.owl#Phantom_experimental_subject qibo:Phantom_experimental_subject rdf:type owl:Class ; - + rdfs:label "Phantom Experimental Subject"^^xsd:string . ### Generated by the OWL API (version 3.5.1) http://owlapi.sourceforge.net - diff --git a/nidm/terms/imports/sio_import.ttl b/nidm/terms/imports/sio_import.ttl index 92116e5f..718acca9 100644 --- a/nidm/terms/imports/sio_import.ttl +++ b/nidm/terms/imports/sio_import.ttl @@ -13,13 +13,13 @@ @base . rdf:type ns2:Ontology ; - + ns0:license "http://creativecommons.org/licenses/by/4.0/"^^ns5:anyURI ; - + ns1:defaultLanguage "en" ; - + ns2:versionInfo "1.0.13" ; - + ns0:contributor """Contributors are those that engage in discussions in the context of SIO (in alphabetical order): christopher baker joachim baran @@ -47,32 +47,32 @@ robert stevens mark wilkinson karin verspoor natalia villanueva-rosales"""@en ; - + ns0:creator "Michel Dumontier"@en ; - + ns0:title "Semanticscience Integrated Ontology (SIO)"@en ; - + ns0:description """The semanticscience integrated ontology (SIO) provides a simple, integrated ontology (types, relations) for objects, processes and their attributes. -This project provides foundational support for the Bio2RDF (http://bio2rdf.org) and SADI (http://sadiframework.org) projects. +This project provides foundational support for the Bio2RDF (http://bio2rdf.org) and SADI (http://sadiframework.org) projects. website: http://semanticscience.org email: sio-ontology@googlegroups.com mailing list: http://groups.google.com/group/sio-ontology """@en ; - + ns0:identifier "sio.owl" ; - + ns0:rights "free to use,share,modify. modify with attribution [http://creativecommons.org/licenses/by/4.0/]." ; - + ns3:comment """general class inclusion axioms: 'is part of' some 'physical entity' subClassOf 'is located in' some 'physical entity' role chains: 'has capability' o 'is realized in' -> 'is participant in'"""@en ; - + ns2:versionIRI ; - + ns3:isDefinedBy . @@ -132,19 +132,19 @@ ns4:subset rdf:type ns2:AnnotationProperty . ns4:SIO_000001 rdf:type ns2:ObjectProperty , ns2:SymmetricProperty ; - + ns3:label "is related to"@en ; - + ns4:subset "relations+" ; - + ns0:identifier "SIO_000001" ; - + ns3:comment "'is related to' is the top level relation in SIO" ; - + ns0:description "A is related to B iff there is some relation between A and B."@en ; - + ns4:subset "core" ; - + ns3:isDefinedBy . @@ -152,17 +152,17 @@ ns4:SIO_000001 rdf:type ns2:ObjectProperty , ### http://semanticscience.org/resource/SIO_000008 ns4:SIO_000008 rdf:type ns2:ObjectProperty ; - + ns3:label "has attribute"@en ; - + ns4:subset "core" ; - + ns0:identifier "SIO_000008" ; - + ns0:description "has attribute is a relation that associates a entity with an attribute where an attribute is an intrinsic characteristic such as a quality, capability, disposition, function, or is an externally derived attribute determined from some descriptor (e.g. a quantity, position, label/identifier) either directly or indirectly through generalization of entities of the same type."@en ; - + ns3:isDefinedBy ; - + ns3:subPropertyOf ns4:SIO_000001 . @@ -171,19 +171,19 @@ ns4:SIO_000008 rdf:type ns2:ObjectProperty ; ns4:SIO_000059 rdf:type ns2:IrreflexiveProperty , ns2:ObjectProperty ; - + ns3:label "has member"@en ; - + ns0:identifier "SIO_000059" ; - + ns4:subset "core" ; - + ns4:example "a collection of cars has as a car as a member"@en ; - + ns0:description "has member is a mereological relation between a collection and an item."@en ; - + ns3:isDefinedBy ; - + ns3:subPropertyOf ns4:SIO_000008 . @@ -191,15 +191,15 @@ ns4:SIO_000059 rdf:type ns2:IrreflexiveProperty , ### http://semanticscience.org/resource/SIO_000210 ns4:SIO_000210 rdf:type ns2:ObjectProperty ; - + ns3:label "represents"@en ; - + ns4:subset "core" ; - + ns0:identifier "SIO_000210" ; - + ns0:description "a represents b when a serves as a sign, symbol or model of b."@en ; - + ns3:isDefinedBy . @@ -207,19 +207,19 @@ ns4:SIO_000210 rdf:type ns2:ObjectProperty ; ### http://semanticscience.org/resource/SIO_000632 ns4:SIO_000632 rdf:type ns2:ObjectProperty ; - + ns3:label "is model of"@en ; - + ns0:identifier "SIO_000632" ; - + ns0:description "is model of is a relation between a model (an artifact) and the entity it purports to represent."@en ; - + ns4:example "the architect builds a model that represents the building she envisions."@en ; - + ns3:isDefinedBy ; - + ns3:subPropertyOf ns4:SIO_000210 ; - + ns2:inverseOf ns4:SIO_000633 . @@ -236,19 +236,19 @@ ns4:SIO_000632 rdf:type ns2:ObjectProperty ; ns4:SIO_000300 rdf:type ns2:DatatypeProperty , ns2:FunctionalProperty ; - + ns3:label "has value"@en ; - + ns4:subset "nlp" , "core" ; - + ns0:identifier "SIO_000300" ; - + ns0:description "A relation between a informational entity and its actual value (numeric, date, text, etc)."@en ; - + ns4:subset "relations" , "sadi"@en ; - + ns3:isDefinedBy . @@ -265,15 +265,15 @@ ns4:SIO_000300 rdf:type ns2:DatatypeProperty , ### http://semanticscience.org/resource/SIO_000004 ns4:SIO_000004 rdf:type ns2:Class ; - + ns3:label "material entity"@en ; - + ns4:subset "core" ; - + ns0:identifier "SIO_000004" ; - + ns0:description "A material entity is a physical entity that is spatially extended, exists as a whole at any point in time and has mass."@en ; - + ns3:isDefinedBy . @@ -281,15 +281,15 @@ ns4:SIO_000004 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000009 ns4:SIO_000009 rdf:type ns2:Class ; - + ns3:label "social entity"@en ; - + ns3:subClassOf ns4:SIO_000015 ; - + ns0:identifier "SIO_000009" ; - + ns0:description "A social entity pertains to the interaction among individuals and groups."@en ; - + ns3:isDefinedBy . @@ -297,16 +297,16 @@ ns4:SIO_000009 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000015 ns4:SIO_000015 rdf:type ns2:Class ; - + ns3:label "information content entity"@en ; - + ns4:subset "ice+" , "core" ; - + ns0:identifier "SIO_000015" ; - + ns0:description "information content entity is an object that requires some background knowledge or procedure to correctly interpret."@en ; - + ns3:isDefinedBy . @@ -316,15 +316,15 @@ ns4:SIO_000015 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000052 ns4:SIO_000052 rdf:type ns2:Class ; - + ns3:label "quantity"@en ; - + ns3:subClassOf ns4:SIO_000070 ; - + ns0:identifier "SIO_000052" ; - + ns0:description "A quantity is an informational entity that gives the magnitude of a property."@en ; - + ns3:isDefinedBy . @@ -332,15 +332,15 @@ ns4:SIO_000052 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000070 ns4:SIO_000070 rdf:type ns2:Class ; - + ns3:label "measurement value"@en ; - + ns3:subClassOf ns4:SIO_000366 ; - + ns0:identifier "SIO_000070" ; - + ns0:description "A measurement value is a quantitative description that reflects the magnitude of some attribute."@en ; - + ns3:isDefinedBy . @@ -348,17 +348,17 @@ ns4:SIO_000070 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000075 ns4:SIO_000075 rdf:type ns2:Class ; - + ns3:label "mathematical entity"@en ; - + ns3:subClassOf ns4:SIO_000015 ; - + ns0:description "A mathematical entity is an information content entity that are components of a mathematical system or can be defined in mathematical terms."@en ; - + ns0:identifier "SIO_000075" ; - + ns4:subset "math+" ; - + ns3:isDefinedBy . @@ -367,15 +367,15 @@ ns4:SIO_000075 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000077 ns4:SIO_000077 rdf:type ns2:Class ; - + ns3:label "generic name"@en ; - + ns3:subClassOf ns4:SIO_000116 ; - + ns0:description "A generic name is the preferred name provided by manufacturer."@en ; - + ns0:identifier "SIO_000077" ; - + ns3:isDefinedBy . @@ -384,17 +384,17 @@ ns4:SIO_000077 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000078 ns4:SIO_000078 rdf:type ns2:Class ; - + ns3:label "language entity"@en ; - + ns3:subClassOf ns4:SIO_000015 ; - + ns0:identifier "SIO_000078" ; - + ns4:subset "nlp+" ; - + ns0:description "A language entity implements some language specification for the visual interpretation and is part of some document."@en ; - + ns3:isDefinedBy . @@ -403,15 +403,15 @@ ns4:SIO_000078 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000079 ns4:SIO_000079 rdf:type ns2:Class ; - + ns3:label "visual language entity"@en ; - + ns3:subClassOf ns4:SIO_000078 ; - + ns0:description "A visual language entity is a language entity that is expressed through physical expression of manual."@en ; - + ns0:identifier "SIO_000079" ; - + ns3:isDefinedBy . @@ -420,15 +420,15 @@ ns4:SIO_000079 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000090 ns4:SIO_000090 rdf:type ns2:Class ; - + ns3:label "specification"@en ; - + ns3:subClassOf ns4:SIO_000136 ; - + ns0:description "A specification is a description of the essential technical attributes/requirements for an object or procedure, and may be used to determine that the object / procedure meets its requirements/attributes."@en ; - + ns0:identifier "SIO_000090" ; - + ns3:isDefinedBy . @@ -436,17 +436,17 @@ ns4:SIO_000090 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000091 ns4:SIO_000091 rdf:type ns2:Class ; - + ns3:label "action specification"@en ; - + ns3:subClassOf ns4:SIO_000090 ; - + ns4:hasSynonym "effective specification" ; - + ns0:identifier "SIO_000091" ; - + ns0:description "An action specification is a specification composed of a sequence of instructions to achieve some objective."@en ; - + ns3:isDefinedBy . @@ -454,15 +454,15 @@ ns4:SIO_000091 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000094 ns4:SIO_000094 rdf:type ns2:Class ; - + ns3:label "algorithm"@en ; - + ns3:subClassOf ns4:SIO_000075 ; - + ns0:identifier "SIO_000094" ; - + ns0:description "An algorithm is an effective method expressed as a finite list of well-defined instructions for calculating a function."@en ; - + ns3:isDefinedBy . @@ -470,15 +470,15 @@ ns4:SIO_000094 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000097 ns4:SIO_000097 rdf:type ns2:Class ; - + ns3:label "software entity"@en ; - + ns3:subClassOf ns4:SIO_000602 ; - + ns0:identifier "SIO_000097" ; - + ns0:description "A software entity is a computational entity that can be interpreted by or directly executed by a processing unit."@en ; - + ns3:isDefinedBy . @@ -486,15 +486,15 @@ ns4:SIO_000097 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000115 ns4:SIO_000115 rdf:type ns2:Class ; - + ns3:label "identifier"@en ; - + ns3:subClassOf ns4:SIO_000179 ; - + ns0:description "An identifier is a label that specifically refers to (identifies) an entity (instance/type)."@en ; - + ns0:identifier "SIO_000115" ; - + ns3:isDefinedBy . @@ -502,15 +502,15 @@ ns4:SIO_000115 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000116 ns4:SIO_000116 rdf:type ns2:Class ; - + ns3:label "name"@en ; - + ns3:subClassOf ns4:SIO_000179 ; - + ns0:identifier "SIO_000116" ; - + ns0:description "A name is a label used to identify an entity."@en ; - + ns3:isDefinedBy . @@ -519,15 +519,15 @@ ns4:SIO_000116 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000117 ns4:SIO_000117 rdf:type ns2:Class ; - + ns3:label "preferred name"@en ; - + ns3:subClassOf ns4:SIO_000116 ; - + ns0:description "A preferred name is the name that is generally used by some organization. "@en ; - + ns0:identifier "SIO_000117" ; - + ns3:isDefinedBy . @@ -535,15 +535,15 @@ ns4:SIO_000117 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000118 ns4:SIO_000118 rdf:type ns2:Class ; - + ns3:label "common name"@en ; - + ns3:subClassOf ns4:SIO_000116 ; - + ns0:description "A common name is a name that is commonly used."@en ; - + ns0:identifier "SIO_000118" ; - + ns3:isDefinedBy . @@ -551,15 +551,15 @@ ns4:SIO_000118 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000119 ns4:SIO_000119 rdf:type ns2:Class ; - + ns3:label "brand name"@en ; - + ns3:subClassOf ns4:SIO_000116 ; - + ns0:identifier "SIO_000119" ; - + ns0:description "A brand name is a trademarked and marketed name of a product."@en ; - + ns3:isDefinedBy . @@ -567,15 +567,15 @@ ns4:SIO_000119 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000120 ns4:SIO_000120 rdf:type ns2:Class ; - + ns3:label "scientific name"@en ; - + ns3:subClassOf ns4:SIO_000116 ; - + ns0:description "A scientific name is a name given through scientific nomenclature."@en ; - + ns0:identifier "SIO_000120" ; - + ns3:isDefinedBy . @@ -584,15 +584,15 @@ ns4:SIO_000120 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000127 ns4:SIO_000127 rdf:type ns2:Class ; - + ns3:label "workflow"@en ; - + ns3:subClassOf ns4:SIO_000094 ; - + ns0:identifier "SIO_000127" ; - + ns0:description "A workflow is an algorithm that is is a depiction of a sequence of operations to achieve one or more objectives."@en ; - + ns3:isDefinedBy . @@ -600,15 +600,15 @@ ns4:SIO_000127 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000136 ns4:SIO_000136 rdf:type ns2:Class ; - + ns3:label "description"@en ; - + ns3:subClassOf ns4:SIO_000078 ; - + ns0:identifier "SIO_000136" ; - + ns0:description "A description is language entity in which elements of a language (formal or natural) are used to characterize an entity."@en ; - + ns3:isDefinedBy . @@ -616,15 +616,15 @@ ns4:SIO_000136 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000144 ns4:SIO_000144 rdf:type ns2:Class ; - + ns3:label "parameter"@en ; - + ns3:subClassOf ns4:SIO_000920 ; - + ns0:identifier "SIO_000144" ; - + ns0:description "A parameter is variable whose value changes the characteristics of a system or a function."@en ; - + ns3:isDefinedBy . @@ -632,15 +632,15 @@ ns4:SIO_000144 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000148 ns4:SIO_000148 rdf:type ns2:Class ; - + ns3:label "document"@en ; - + ns3:subClassOf ns4:SIO_000651 ; - + ns0:identifier "SIO_000148" ; - + ns0:description "A document is a bounded physical or digital representation of a body of information designed with the capacity (and usually intent) to communicate."@en ; - + ns3:isDefinedBy . @@ -648,16 +648,16 @@ ns4:SIO_000148 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000179 ns4:SIO_000179 rdf:type ns2:Class ; - + ns3:label "label"@en ; - + ns3:subClassOf ns4:SIO_000179 , ns4:SIO_000651 ; - + ns0:identifier "SIO_000179" ; - + ns0:description "a label is a term that is associated with some entity"@en ; - + ns3:isDefinedBy . @@ -665,15 +665,15 @@ ns4:SIO_000179 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000181 ns4:SIO_000181 rdf:type ns2:Class ; - + ns3:label "first name"@en ; - + ns3:subClassOf ns4:SIO_000116 ; - + ns0:identifier "SIO_000181" ; - + ns0:description "A first name is a name that denotes a specific individual between members of a group of individuals, whose members usually share the same surname."@en ; - + ns3:isDefinedBy . @@ -681,15 +681,15 @@ ns4:SIO_000181 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000182 ns4:SIO_000182 rdf:type ns2:Class ; - + ns3:label "last name"@en ; - + ns3:subClassOf ns4:SIO_000116 ; - + ns0:description "A last name (surname) is a name added to a given name and is part of a personal name and is often the family name."@en ; - + ns0:identifier "SIO_000182" ; - + ns3:isDefinedBy . @@ -697,15 +697,15 @@ ns4:SIO_000182 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000183 ns4:SIO_000183 rdf:type ns2:Class ; - + ns3:label "personal name"@en ; - + ns3:subClassOf ns4:SIO_000116 ; - + ns0:description "A personal name is a name to identify an individual person and usually comprises of a first name and a last name."@en ; - + ns0:identifier "SIO_000183" ; - + ns3:isDefinedBy . @@ -713,15 +713,15 @@ ns4:SIO_000183 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000184 ns4:SIO_000184 rdf:type ns2:Class ; - + ns3:label "legal name"@en ; - + ns3:subClassOf ns4:SIO_000183 ; - + ns0:description "A legal name is a name given at birth, or which appears on their birth certificate, marriage certificate, or change of name certificate."@en ; - + ns0:identifier "SIO_000184" ; - + ns3:isDefinedBy . @@ -729,15 +729,15 @@ ns4:SIO_000184 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000186 ns4:SIO_000186 rdf:type ns2:Class ; - + ns3:label "document version"@en ; - + ns3:subClassOf ns4:SIO_000653 ; - + ns0:identifier "SIO_000186" ; - + ns0:description "A document version is a version of a work in some sequence of derivative works."@en ; - + ns3:isDefinedBy . @@ -745,15 +745,15 @@ ns4:SIO_000186 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000256 ns4:SIO_000256 rdf:type ns2:Class ; - + ns3:label "proposition"@en ; - + ns3:subClassOf ns4:SIO_000136 ; - + ns0:description "A proposition is a sentence expressing something true or false."@en ; - + ns0:identifier "SIO_000256" ; - + ns3:isDefinedBy . @@ -762,17 +762,17 @@ ns4:SIO_000256 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000257 ns4:SIO_000257 rdf:type ns2:Class ; - + ns3:label "dimensionless quantity"@en ; - + ns3:subClassOf ns4:SIO_000052 ; - + ns2:disjointWith ns4:SIO_000258 ; - + ns0:description "A dimensionless quantity is a quantity that has no associated unit."@en ; - + ns0:identifier "SIO_000257" ; - + ns3:isDefinedBy . @@ -780,15 +780,15 @@ ns4:SIO_000257 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000258 ns4:SIO_000258 rdf:type ns2:Class ; - + ns3:label "dimensional quantity"@en ; - + ns3:subClassOf ns4:SIO_000052 ; - + ns0:identifier "SIO_000258" ; - + ns0:description "A dimensional quantity is a quantity that has an associated unit."@en ; - + ns3:isDefinedBy . @@ -796,15 +796,15 @@ ns4:SIO_000258 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000360 ns4:SIO_000360 rdf:type ns2:Class ; - + ns3:label "belief"@en ; - + ns3:subClassOf ns4:SIO_000256 ; ns0:description "A belief is a proposition that is believed to be true."@en ; - + ns0:identifier "SIO_000360" ; - + ns3:isDefinedBy . @@ -812,15 +812,15 @@ ns4:SIO_000360 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000366 ns4:SIO_000366 rdf:type ns2:Class ; - + ns3:label "number"@en ; - + ns3:subClassOf ns4:SIO_000075 ; - + ns0:identifier "SIO_000366" ; - + ns0:description "A number is a mathematical object used to count, label, and measure."@en ; - + ns3:isDefinedBy . @@ -828,15 +828,15 @@ ns4:SIO_000366 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000367 ns4:SIO_000367 rdf:type ns2:Class ; - + ns3:label "variable"@en ; - + ns3:subClassOf ns4:SIO_000075 ; - + ns0:description "A variable is a value that may change within the scope of a given problem or set of operations."@en ; - + ns0:identifier "SIO_000367" ; - + ns3:isDefinedBy . @@ -844,15 +844,15 @@ ns4:SIO_000367 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000396 ns4:SIO_000396 rdf:type ns2:Class ; - + ns3:label "file"@en ; - + ns3:subClassOf ns4:SIO_000602 ; - + ns0:description "A file is an information-bearing object that contains a physical embodiment of some information using a particular character encoding."@en ; - + ns0:identifier "SIO_000396" ; - + ns3:isDefinedBy . @@ -862,15 +862,15 @@ ns4:SIO_000396 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000485 ns4:SIO_000485 rdf:type ns2:Class ; - + ns3:label "human"@en ; - + ns3:subClassOf ns4:SIO_010375 ; - + ns0:identifier "SIO_000485" ; - + ns0:description "A human is a primates of the family Hominidae and are characterized by having a large brain relative to body size, with a well developed neocortex, prefrontal cortex and temporal lobes, making them capable of abstract reasoning, language, introspection, problem solving and culture through social learning."@en ; - + ns3:isDefinedBy . @@ -878,15 +878,15 @@ ns4:SIO_000485 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000486 ns4:SIO_000486 rdf:type ns2:Class ; - + ns3:label "mouse"@en ; - + ns3:subClassOf ns4:SIO_010375 ; - + ns0:description "A mouse is a small mammal belonging to the order of rodents, characteristically having a pointed snout, small rounded ears, and a long naked or almost hairless tail. "@en ; - + ns0:identifier "SIO_000486" ; - + ns3:isDefinedBy . @@ -894,15 +894,15 @@ ns4:SIO_000486 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000487 ns4:SIO_000487 rdf:type ns2:Class ; - + ns3:label "rat"@en ; - + ns3:subClassOf ns4:SIO_010375 ; - + ns0:identifier "SIO_000487" ; - + ns0:description "A rat is a medium-sized, long-tailed rodent of the superfamily Muroidea."@en ; - + ns3:isDefinedBy . @@ -910,15 +910,15 @@ ns4:SIO_000487 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000510 ns4:SIO_000510 rdf:type ns2:Class ; - + ns3:label "model"@en ; - + ns3:subClassOf ns4:SIO_000612 ; - + ns0:identifier "SIO_000510" ; - + ns0:description "A model is a representation of some thing."@en ; - + ns3:isDefinedBy . @@ -927,15 +927,15 @@ ns4:SIO_000510 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000596 ns4:SIO_000596 rdf:type ns2:Class ; - + ns3:label "data storage device"@en ; - + ns3:subClassOf ns4:SIO_000956 ; - + ns0:identifier "SIO_000596" ; - + ns0:description "A data storage device is a device that is capable of storing information."@en ; - + ns3:isDefinedBy . @@ -943,15 +943,15 @@ ns4:SIO_000596 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000602 ns4:SIO_000602 rdf:type ns2:Class ; - + ns3:label "computational entity"@en ; - + ns3:subClassOf ns4:SIO_000015 ; - + ns0:description "A computational entity is an information content entity operated on using some computational system."@en ; - + ns0:identifier "SIO_000602" ; - + ns3:isDefinedBy . @@ -959,15 +959,15 @@ ns4:SIO_000602 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000612 ns4:SIO_000612 rdf:type ns2:Class ; - + ns3:label "representation"@en ; - + ns3:subClassOf ns4:SIO_000015 ; - + ns0:identifier "SIO_000612" ; - + ns0:description "A representation is a entity that in some way represents another entity (or attribute thereof)."@en ; - + ns3:isDefinedBy . @@ -975,15 +975,15 @@ ns4:SIO_000612 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000614 ns4:SIO_000614 rdf:type ns2:Class ; - + ns3:label "attribute"@en ; - + ns0:description "An attribute is a characteristic of some entity."@en ; - + ns4:subset "core" ; - + ns0:identifier "SIO_000614" ; - + ns3:isDefinedBy . @@ -992,15 +992,15 @@ ns4:SIO_000614 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000651 ns4:SIO_000651 rdf:type ns2:Class ; - + ns3:label "textual entity"@en ; - + ns3:subClassOf ns4:SIO_000079 ; - + ns0:description "A textual entity is language entity that is manifested as one or more distinct characters."@en ; - + ns0:identifier "SIO_000651" ; - + ns3:isDefinedBy . @@ -1008,15 +1008,15 @@ ns4:SIO_000651 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000653 ns4:SIO_000653 rdf:type ns2:Class ; - + ns3:label "version label"@en ; - + ns3:subClassOf ns4:SIO_000115 ; - + ns0:identifier "SIO_000653" ; - + ns0:description "A version label is a label for a particular form or variation of an earlier or original type." ; - + ns3:isDefinedBy . @@ -1024,17 +1024,17 @@ ns4:SIO_000653 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000654 ns4:SIO_000654 rdf:type ns2:Class ; - + ns3:label "software version label"@en ; - + ns3:subClassOf ns4:SIO_000653 ; - + ns0:identifier "SIO_000654" ; - + ns0:description "A software version label is a version label for a piece of software."@en ; - + ns4:example "major.minor[.build[.revision]]"@en ; - + ns3:isDefinedBy . @@ -1042,15 +1042,15 @@ ns4:SIO_000654 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000675 ns4:SIO_000675 rdf:type ns2:Class ; - + ns3:label "unique identifier"@en ; - + ns3:subClassOf ns4:SIO_000115 ; - + ns0:identifier "SIO_000675" ; - + ns0:description "A unique identifier is an identifier that uniquely identifies some thing."@en ; - + ns3:isDefinedBy . @@ -1058,15 +1058,15 @@ ns4:SIO_000675 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000729 ns4:SIO_000729 rdf:type ns2:Class ; - + ns3:label "record identifier"@en ; - + ns3:subClassOf ns4:SIO_000731 ; - + ns0:description "A record identifier is an identifier for a database entry."@en ; - + ns0:identifier "SIO_000729" ; - + ns3:isDefinedBy . @@ -1074,15 +1074,15 @@ ns4:SIO_000729 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000730 ns4:SIO_000730 rdf:type ns2:Class ; - + ns3:label "physical entity identifier"@en ; - + ns3:subClassOf ns4:SIO_000115 ; - + ns0:identifier "SIO_000730" ; - + ns0:description "A physical entity identifier is an identifier for a physical entity."@en ; - + ns3:isDefinedBy . @@ -1090,15 +1090,15 @@ ns4:SIO_000730 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000731 ns4:SIO_000731 rdf:type ns2:Class ; - + ns3:label "informational entity identifier"@en ; - + ns3:subClassOf ns4:SIO_000115 ; - + ns0:description "An informational entity identifier is an identifier for an informational entity."@en ; - + ns0:identifier "SIO_000731" ; - + ns3:isDefinedBy . @@ -1108,17 +1108,17 @@ ns4:SIO_000731 rdf:type ns2:Class ; ns4:SIO_000736 rdf:type ns2:ObjectProperty , ns2:SymmetricProperty ; - + ns3:label "is comparable to"@en ; - + ns0:identifier "SIO_000736" ; - + ns4:subset "core" ; - + ns0:description "is comparable to is a relation between two entities that share at least one feature whose value can be compared."@en ; - + ns3:isDefinedBy ; - + ns3:subPropertyOf ns4:SIO_000001 . @@ -1126,15 +1126,15 @@ ns4:SIO_000736 rdf:type ns2:ObjectProperty , ### http://semanticscience.org/resource/SIO_000794 ns4:SIO_000794 rdf:type ns2:Class ; - + ns3:label "count"@en ; - + ns3:subClassOf ns4:SIO_000257 ; - + ns0:identifier "SIO_000794" ; - + ns0:description "The number of elements of a finite set of objects."@en ; - + ns3:isDefinedBy . @@ -1142,17 +1142,17 @@ ns4:SIO_000794 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000811 ns4:SIO_000811 rdf:type ns2:Class ; - + ns3:label "URL"@en ; - + ns3:subClassOf ns4:SIO_000731 ; - + ns0:description "A Uniform Resource Locator or Universal Resource Locator (URL) is a specific character string that constitutes a reference to an Internet resource."@en ; - + ns0:identifier "SIO_000811" ; - + ns4:hasSynonym "Uniform Resource Locator"@en ; - + ns3:isDefinedBy . @@ -1160,15 +1160,15 @@ ns4:SIO_000811 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000920 ns4:SIO_000920 rdf:type ns2:Class ; - + ns3:label "independent variable"@en ; - + ns3:subClassOf ns4:SIO_000367 ; - + ns0:description "An independent variable is a variable that may take on different values independent of other elements in a system."@en ; - + ns0:identifier "SIO_000920" ; - + ns3:isDefinedBy . @@ -1176,15 +1176,15 @@ ns4:SIO_000920 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000921 ns4:SIO_000921 rdf:type ns2:Class ; - + ns3:label "dependent variable"@en ; - + ns3:subClassOf ns4:SIO_000367 ; - + ns0:description "A dependent variable is one whose value changes as a consequence of changes in other values in the system."@en ; - + ns0:identifier "SIO_000921" ; - + ns3:isDefinedBy . @@ -1192,15 +1192,15 @@ ns4:SIO_000921 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_000956 ns4:SIO_000956 rdf:type ns2:Class ; - + ns3:label "device"@en ; - + ns3:subClassOf ns4:SIO_010462 ; - + ns0:description "A device is usually a constructed tool."@en ; - + ns0:identifier "SIO_000956" ; - + ns3:isDefinedBy . @@ -1209,13 +1209,13 @@ ns4:SIO_000956 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001000 ns4:SIO_001000 rdf:type ns2:Class ; - + ns3:label "clinical trial"@en ; - + ns0:identifier "SIO_001000" ; - + ns0:description "A clinical trial is an intervention trial to determine the safety and efficacy of medical interventions (e.g., drugs, diagnostics, devices, therapy protocols). "@en ; - + ns3:isDefinedBy . @@ -1223,15 +1223,15 @@ ns4:SIO_001000 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001003 ns4:SIO_001003 rdf:type ns2:Class ; - + ns3:label "diagnostic opinion"@en ; - + ns3:subClassOf ns4:SIO_001004 ; - + ns0:description "A diagnostic opinion is an opinion resulting from a medical diagnostic procedure."@en ; - + ns0:identifier "SIO_001003" ; - + ns3:isDefinedBy . @@ -1239,15 +1239,15 @@ ns4:SIO_001003 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001004 ns4:SIO_001004 rdf:type ns2:Class ; - + ns3:label "opinion"@en ; - + ns3:subClassOf ns4:SIO_000360 ; - + ns0:description "An opinion is a belief that is the result of emotion or interpretation of facts. "@en ; - + ns0:identifier "SIO_001004" ; - + ns3:isDefinedBy . @@ -1255,15 +1255,15 @@ ns4:SIO_001004 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001013 ns4:SIO_001013 rdf:type ns2:Class ; - + ns3:label "age"@en ; - + ns3:subClassOf ns4:SIO_000794 ; - + ns0:identifier "SIO_001013" ; - + ns0:description "age is the length of time that a person has lived or a thing has existed."@en ; - + ns3:isDefinedBy . @@ -1271,13 +1271,13 @@ ns4:SIO_001013 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001014 ns4:SIO_001014 rdf:type ns2:Class ; - + ns3:label "ethnicity"@en ; - + ns0:identifier "SIO_001014" ; - + ns0:description "ethnicity is the biological quality of membership in a social group based on a common heritage."@en ; - + ns3:isDefinedBy . @@ -1285,13 +1285,13 @@ ns4:SIO_001014 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001015 ns4:SIO_001015 rdf:type ns2:Class ; - + ns3:label "race"@en ; - + ns0:identifier "SIO_001015" ; - + ns0:description "race is a characteristic of an individual by heritable phenotypic characteristics, geographic ancestry, physical appearance, ethnicity, and social status."@en ; - + ns3:isDefinedBy . @@ -1300,15 +1300,15 @@ ns4:SIO_001015 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001031 ns4:SIO_001031 rdf:type ns2:Class ; - + ns3:label "minor version number"@en ; - + ns3:subClassOf ns4:SIO_000654 ; - + ns0:description "A minor version number is a version of a software that exhibits minor features or significant fix from a prior version."@en ; - + ns0:identifier "SIO_001031" ; - + ns3:isDefinedBy . @@ -1316,15 +1316,15 @@ ns4:SIO_001031 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001032 ns4:SIO_001032 rdf:type ns2:Class ; - + ns3:label "revision number"@en ; - + ns3:subClassOf ns4:SIO_000654 ; - + ns0:description "A revision number is a version of a software in which bugs have been fixed from a prior version."@en ; - + ns0:identifier "SIO_001032" ; - + ns3:isDefinedBy . @@ -1333,14 +1333,14 @@ ns4:SIO_001032 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001068 ns4:SIO_001068 rdf:type ns2:Class ; - + ns3:label "control group"@en ; - - + + ns0:description "A control group is a group of individuals that are not subject to an intervention of interest, but rather serve as a baseline to compare the outcomes in the intervention group."@en ; - + ns0:identifier "SIO_001068" ; - + ns3:isDefinedBy . @@ -1348,13 +1348,13 @@ ns4:SIO_001068 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001069 ns4:SIO_001069 rdf:type ns2:Class ; - + ns3:label "intervention group"@en ; - + ns0:description "An intervention group is a group of individuals that are subject to an intervention."@en ; - + ns0:identifier "SIO_001069" ; - + ns3:isDefinedBy . @@ -1362,15 +1362,15 @@ ns4:SIO_001069 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001196 ns4:SIO_001196 rdf:type ns2:Class ; - + ns3:label "history"@en ; - + ns3:subClassOf ns4:SIO_000136 ; - + ns0:identifier "SIO_001196" ; - + ns0:description "history is a sequence of past events."@en ; - + ns3:isDefinedBy . @@ -1378,15 +1378,15 @@ ns4:SIO_001196 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001236 ns4:SIO_001236 rdf:type ns2:Class ; - + ns3:label "data collection device"@en ; - + ns3:subClassOf ns4:SIO_000956 ; - + ns0:description "A data collection device is a device that collects information about one or more objects."@en ; - + ns0:identifier "SIO_001236" ; - + ns3:isDefinedBy . @@ -1394,19 +1394,19 @@ ns4:SIO_001236 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_001240 ns4:SIO_001240 rdf:type ns2:Class ; - + ns3:label "nmr device"@en ; - + ns3:subClassOf ns4:SIO_001236 ; - + ns4:equivalentTo "http://purl.obolibrary.org/obo/OBI_0000566"^^ns5:anyURI ; - + ns0:identifier "SIO_001240" ; - + ns0:description "A nuclear magnetic resonance (NMR) device is a device that applies a magnetic field to perturb nuclei with an odd number of protons and/or of neutrons in order to hav them absort and re-emit electromagnetic radiation. "@en ; - + ns4:hasSynonym "nuclear magnetic resonance device"@en ; - + ns3:isDefinedBy . @@ -1415,15 +1415,15 @@ ns4:SIO_001240 rdf:type ns2:Class ; ns4:SIO_001242 rdf:type ns2:ObjectProperty , ns2:SymmetricProperty ; - + ns3:label "is identical to"@en ; - + ns0:identifier "SIO_001242" ; - + ns0:description "is identical to is a relation between two objects that are conceptually the same notwithstanding provenance or other non-intrinsic attributes."@en ; - + ns3:isDefinedBy ; - + ns3:subPropertyOf ns4:SIO_000736 . @@ -1432,15 +1432,15 @@ ns4:SIO_001242 rdf:type ns2:ObjectProperty , ### http://semanticscience.org/resource/SIO_010000 ns4:SIO_010000 rdf:type ns2:Class ; - + ns3:label "organism"@en ; - + ns3:subClassOf ns4:SIO_010046 ; - + ns0:description "A biological organism is a biological entity that consists of one or more cells and is capable of genomic replication (independently or not)."@en ; - + ns0:identifier "SIO_010000" ; - + ns3:isDefinedBy . @@ -1448,17 +1448,17 @@ ns4:SIO_010000 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_010046 ns4:SIO_010046 rdf:type ns2:Class ; - + ns3:label "biological entity"@en ; - + ns3:subClassOf ns4:SIO_010462 ; - + ns0:identifier "SIO_010046" ; - + ns4:subset "chemical-" ; - + ns0:description "A biological entity is a heterogeneous substance that contains genomic material or is the product of a biological process."@en ; - + ns3:isDefinedBy . @@ -1467,15 +1467,15 @@ ns4:SIO_010046 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_010060 ns4:SIO_010060 rdf:type ns2:Class ; - + ns3:label "family history"@en ; - + ns3:subClassOf ns4:SIO_001196 ; - + ns0:identifier "SIO_010060" ; - + ns0:description "family history is the systematic narrative and research of past events relating to a specific family, or specific families."@en ; - + ns3:isDefinedBy . @@ -1483,15 +1483,15 @@ ns4:SIO_010060 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_010375 ns4:SIO_010375 rdf:type ns2:Class ; - + ns3:label "multicellular organism"@en ; - + ns3:subClassOf ns4:SIO_010377 ; - + ns0:identifier "SIO_010375" ; - + ns0:description "A multi-cellular organism is an organism that consists of more than one cell."@en ; - + ns3:isDefinedBy . @@ -1499,15 +1499,15 @@ ns4:SIO_010375 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_010377 ns4:SIO_010377 rdf:type ns2:Class ; - + ns3:label "cellular organism"@en ; - + ns3:subClassOf ns4:SIO_010000 ; - + ns0:description "A cellular organism is an organism that contains one or more cells."@en ; - + ns0:identifier "SIO_010377" ; - + ns3:isDefinedBy . @@ -1515,19 +1515,18 @@ ns4:SIO_010377 rdf:type ns2:Class ; ### http://semanticscience.org/resource/SIO_010462 ns4:SIO_010462 rdf:type ns2:Class ; - + ns3:label "heterogeneous substance"@en ; - + ns3:subClassOf ns4:SIO_000004 ; - + ns0:description "A heterogeneous substance is a chemical substance that is composed of more than one different kind of component."@en ; - + ns0:identifier "SIO_010462" ; - + ns3:isDefinedBy . ### Generated by the OWL API (version 3.5.1) http://owlapi.sourceforge.net - diff --git a/nidm/terms/imports/stato_import.ttl b/nidm/terms/imports/stato_import.ttl index 893af6c2..7d39ac59 100644 --- a/nidm/terms/imports/stato_import.ttl +++ b/nidm/terms/imports/stato_import.ttl @@ -19,19 +19,19 @@ ### http://purl.obolibrary.org/obo/STATO_0000193 obo:STATO_0000193 rdf:type owl:Class ; - + rdfs:label "study group population" ; - + "Alejandra Gonzalez-Beltran"@en , "Orlaith Burke" , "Philippe Rocca-Serra" ; - + "STATO"@en ; - + "is a population whose individual members realize (may be expressed as) a combination of inclusion rule values specifications or resulting from a sampling process (e.g. recruitment followed by randomization to group) on which a number of measurements will be carried out, which may be used as input to statistical tests and statistical inference." ; - + "statistical sample"@en ; - + . ### http://purl.obolibrary.org/obo/IAO_0000112 @@ -113,20 +113,20 @@ obo:IAO_0000136 rdf:type owl:ObjectProperty . ### http://purl.obolibrary.org/obo/STATO_0000030 obo:STATO_0000030 rdf:type owl:Class ; - + rdfs:label "Chi-Squared statistic"@en ; - + rdfs:subClassOf obo:STATO_0000039 ; - + obo:IAO_0000117 "Orlaith Burke"^^xsd:string , "Alejandra Gonzalez-Beltran"@en ; - + obo:IAO_0000115 "Chi-squared statistic is a statistic computed from observations and used to produce a p-value in statistical test when compared to a Chi-Squared distribution."@en ; - + obo:IAO_0000117 "Philippe Rocca-Serra"@en ; - + obo:IAO_0000119 "STATO"@en ; - + obo:IAO_0000114 obo:IAO_0000122 . @@ -134,22 +134,22 @@ obo:STATO_0000030 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000039 obo:STATO_0000039 rdf:type owl:Class ; - + rdfs:label "statistic"@en ; - + obo:IAO_0000117 "Orlaith Burke"^^xsd:string ; - + obo:IAO_0000112 ""@en ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran"@en , "Philippe Rocca-Serra"@en ; - + obo:IAO_0000119 "STATO, adapted from wikipedia (http://en.wikipedia.org/wiki/Statistic)."@en ; - + obo:IAO_0000115 "a statistic is a measurement datum to describe a dataset or a variable. It is generated by a calculation on set of observed data."@en ; - + obo:STATO_0000032 "statistic"@en ; - + obo:IAO_0000114 obo:IAO_0000122 . @@ -157,21 +157,21 @@ obo:STATO_0000039 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000119 obo:STATO_0000119 rdf:type owl:Class ; - + rdfs:label "model parameter estimation"@en ; - + obo:IAO_0000117 "Orlaith Burke"^^xsd:string ; - + obo:IAO_0000119 ""@en ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran"@en , "Philippe Rocca-Serra"@en ; - + obo:IAO_0000115 "model parameter estimation is a data transformation that finds parameter values (the model parameter estimates) most compatible with the data as judged by the model."@en ; - + obo:IAO_0000116 """textual definition modified following contributiong by Thomas Nichols: https://github.com/ISA-tools/stato/issues/18"""@en ; - + obo:IAO_0000114 obo:IAO_0000125 . @@ -179,22 +179,22 @@ https://github.com/ISA-tools/stato/issues/18"""@en ; ### http://purl.obolibrary.org/obo/STATO_0000176 obo:STATO_0000176 rdf:type owl:Class ; - + rdfs:label "t-statistic"@en ; - + rdfs:subClassOf obo:STATO_0000039 ; - + obo:IAO_0000117 "Orlaith Burke"^^xsd:string ; - + obo:STATO_0000032 ""@en ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran"@en , "Philippe Rocca-Serra"@en ; - + obo:IAO_0000119 "STATO"@en ; - + obo:IAO_0000115 "t-statistic is a statistic computed from observations and used to produce a p-value in statistical test when compared to a Student's t distribution."@en ; - + obo:IAO_0000114 obo:IAO_0000122 . @@ -202,20 +202,20 @@ obo:STATO_0000176 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000282 obo:STATO_0000282 rdf:type owl:Class ; - + rdfs:label "F-statistic"@en ; - + rdfs:subClassOf obo:STATO_0000039 ; - + obo:IAO_0000117 "Orlaith Burke"^^xsd:string , "Alejandra Gonzalez-Beltran"@en ; - + obo:IAO_0000115 "F statistic is a statistic computed from observations and used to produce a p-value in statistical test when compared to a F distribution. the F statistic is the ratio of two scaled sums of squares reflecting different sources of variability"@en ; - + obo:IAO_0000117 "Philippe Rocca-Serra"@en ; - + obo:IAO_0000119 "STATO"@en ; - + obo:IAO_0000114 obo:IAO_0000122 . @@ -223,22 +223,22 @@ obo:STATO_0000282 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000323 obo:STATO_0000323 rdf:type owl:Class ; - + rdfs:label "contrast weight matrix"@en ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran"@en , "Camille Maumet"@en , "Orlaith Burke"@en , "Philippe Rocca-Serra"@en ; - + obo:IAO_0000119 "STATO"@en ; - + obo:IAO_0000117 "Tom Nichols"@en ; - + obo:IAO_0000112 "[1,0,0]"@en ; - + obo:IAO_0000115 "a contrast weight matrix is a information content entity which holds a set of contrast weight, coefficient used in a weighting sum of means defining a contrast"@en ; - + obo:STATO_0000032 "contrast weights"@en . @@ -246,19 +246,19 @@ obo:STATO_0000323 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000346 obo:STATO_0000346 rdf:type owl:Class ; - + rdfs:label "covariance structure"@en ; - + obo:IAO_0000117 "Camille Maumet" , "Orlaith Burke" , "Tom Nichols" , "Alejandra Gonzalez-Beltran" , "Philippe Rocca-Serra" ; - + obo:IAO_0000115 "a covariance structure is a data item which is part of a regression model and which indicates a pattern in the covariance matrix. The nature of covariance structure is specified before the regression analysis and various covariance structure may be tested and evaluated using information criteria to help choose the most suiteable model"@en ; - + obo:IAO_0000119 "http://www3.nd.edu/~kyuan/courses/sem/readpapers/benter.pdf" ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -266,27 +266,27 @@ obo:STATO_0000346 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000357 obo:STATO_0000357 rdf:type owl:Class ; - + rdfs:label "Toeplitz covariance structure"@en ; - + rdfs:subClassOf obo:STATO_0000346 ; - + obo:IAO_0000117 "Orlaith Burke" ; - + obo:IAO_0000115 ""@en ; - + obo:STATO_0000041 ""@en ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran"@en , "Camille Maumet"@en , "Philippe Rocca-Serra"@en ; - + obo:IAO_0000118 "TOEP"@en ; - + obo:IAO_0000117 "Tom Nichols"@en ; - + obo:IAO_0000119 "http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_mixed_sect019.htm"@en ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -294,26 +294,26 @@ obo:STATO_0000357 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000362 obo:STATO_0000362 rdf:type owl:Class ; - + rdfs:label "compound symmetry covariance structure"@en ; - + rdfs:subClassOf obo:STATO_0000346 ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran"@en ; - + obo:IAO_0000118 "CS"@en ; - + obo:IAO_0000117 "Camille Maumet"@en , "Orlaith Burke" , "Philippe Rocca-Serra"@en , "Tom Nichols"@en ; - + obo:IAO_0000115 "compound symmetry covariance structure is a covariance structure which means that all the variances are equal and all the covariances are equal."@en ; - + obo:STATO_0000041 "http://stat.ethz.ch/R-manual/R-devel/library/nlme/html/corCompSymm.html"@en ; - + obo:IAO_0000119 "http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_mixed_sect019.htm"@en ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -321,25 +321,25 @@ obo:STATO_0000362 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000370 obo:STATO_0000370 rdf:type owl:Class ; - + rdfs:label "ordinary least squares estimation"@en ; - + rdfs:subClassOf obo:STATO_0000119 ; - + obo:STATO_0000032 "OLS estimation" ; - + obo:STATO_0000041 ""@en , "https://stat.ethz.ch/R-manual/R-patched/library/stats/html/lm.html" ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran"@en , "Camille Maumet"@en , "Philippe Rocca-Serra"@en , "Tom Nichols"@en ; - + obo:IAO_0000119 "http://en.wikipedia.org/wiki/Ordinary_least_squares and Tom Nichols"@en ; - + obo:IAO_0000115 "the ordinary least squares estimation is a model parameter estimation for a linear regression model when the errors are uncorrelated and equal in variance. Is the Best Linear Unbiased Estimation (BLUE) method under these assumptions, Uniformly Minimum-Variance Unbiased Estimator (UMVUE) with addition of a Gaussian assumption."@en ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -347,25 +347,25 @@ obo:STATO_0000370 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000371 obo:STATO_0000371 rdf:type owl:Class ; - + rdfs:label "weighted least squares estimation"@en ; - + rdfs:subClassOf obo:STATO_0000119 ; - + obo:STATO_0000032 "WLS estimation" ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran"@en , "Camille Maumet"@en , "Orlaith Burke"@en , "Philippe Rocca-Serra"@en , "Tom Nichols"@en ; - + obo:IAO_0000119 "http://en.wikipedia.org/wiki/Least_squares#Weighted_least_squares and Tom Nichols"@en ; - + obo:STATO_0000041 "https://stat.ethz.ch/R-manual/R-patched/library/stats/html/lm.html"@en ; - + obo:IAO_0000115 "the weighted least squares estimation is a model parameter estimation for a linear regression model with errors that independent but have heterogeneous variance. Difficult to use use in practice, as weights must be set based on the variance which is usually unknown. If true variance is known, it is the Best Linear Unbiased Estimation (BLUE) method under these assumptions, Uniformly Minimum-Variance Unbiased Estimator (UMVUE) with addition of a Gaussian assumption."@en ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -373,22 +373,22 @@ obo:STATO_0000371 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000372 obo:STATO_0000372 rdf:type owl:Class ; - + rdfs:label "generalized least squares estimation"@en ; - + rdfs:subClassOf obo:STATO_0000119 ; - + obo:STATO_0000032 "GLS estimation" ; - + obo:IAO_0000117 "Philippe Rocca-Serra"@en , "Tom Nichols"@en ; - + obo:IAO_0000119 "http://en.wikipedia.org/wiki/Generalized_least_squares and Tom Nichols"@en ; - + obo:STATO_0000041 "http://stat.ethz.ch/R-manual/R-devel/library/nlme/html/gls.html"@en ; - + obo:IAO_0000115 "the generalized least squares estimation is a model parameter estimation for a linear regression model with errors that are dependent and (possibly) have heterogeneous variance. Difficult to use use in practice, as covariance matrix of the errors must known to \"whiten\" data and model. If true covariance is known, it is the Best Linear Unbiased Estimation (BLUE) method under these assumptions, Uniformly Minimum-Variance Unbiased Estimator (UMVUE) with addition of a Gaussian assumption."@en ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -396,25 +396,25 @@ obo:STATO_0000372 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000373 obo:STATO_0000373 rdf:type owl:Class ; - + rdfs:label "iteratively reweighted least squares estimation"@en ; - + rdfs:subClassOf obo:STATO_0000119 ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran" , "Camille Maumet" , "Orlaith Burke" ; - + obo:STATO_0000041 ""@en ; - + obo:IAO_0000117 "Philippe Rocca-Serra"@en ; - + obo:IAO_0000119 "Tom Nichols"@en ; - + obo:IAO_0000117 "Tom Nichols"@en ; - + obo:IAO_0000115 "the iteratively reweighted least squares estimation is a model parameter estimation which is a practical implementation of Weighted Least Squares, where the heterogeneous variances of the errors are estimated from the residuals of the regression model, providing an estimate for the weights. Each successive estimate of the weights improves the estimation of the regression parameters, which in turn are used to compute residuals and update the weights"@en ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -422,22 +422,22 @@ obo:STATO_0000373 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000374 obo:STATO_0000374 rdf:type owl:Class ; - + rdfs:label "feasible generalized least squares estimation"@en ; - + rdfs:subClassOf obo:STATO_0000372 ; - + obo:IAO_0000115 "the feasible generalized least squares estimation is a model parameter estimation which is a practical implementation of Generalised Least Squares, where the covariance of the errors is estimated from the residuals of the regression model, providing the information needed to whiten the data and model. Each successive estimate of the whitening matrix improves the estimation of the regression parameters, which in turn are used to compute residuals and update the whitening matrix." ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran"@en , "Orlaith Burke"@en ; - + obo:IAO_0000119 "Tom Nichols" ; - + obo:IAO_0000117 "Camille Maumet" , "Philippe Rocca-Serra"@en , "Tom Nichols"@en ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -445,20 +445,20 @@ obo:STATO_0000374 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000376 obo:STATO_0000376 rdf:type owl:Class ; - + rdfs:label "Z-statistic" ; - + rdfs:subClassOf obo:STATO_0000039 ; - + obo:IAO_0000117 "Alejandra Gonzalez-Beltran" , "Camille Maument" , "Philippe Rocca-Serra" , "Thomas Nichols" ; - + obo:IAO_0000119 "http://en.wikipedia.org/wiki/Z-test" ; - + obo:IAO_0000115 "Z-statistic is a statistic computed from observations and used to produce a p-value when compared to a Standard Normal Distribution in a statistical test called the Z-test." ; - + obo:IAO_0000114 obo:IAO_0000120 . @@ -470,84 +470,84 @@ obo:STATO_0000376 rdf:type owl:Class ; ### http://purl.obolibrary.org/obo/STATO_0000225 rdf:type owl:Class ; - + rdfs:label "probability distribution"@en ; - + "Orlaith Burke"^^xsd:string , "Alejandra Gonzalez-Beltran" ; - + """A probability distribution is a information content entity specifies the probability of the value of a random variable. For a discrete random variable, a mathematical formula that gives the probability of each value of the variable. For a continuous random variable, a curve described by a mathematical formula which specifies, by way of areas under the curve, the probability that the variable falls within a particular interval.""" ; - + "Philippe Rocca-Serra" ; - + . ### http://purl.obolibrary.org/obo/STATO_0000067 rdf:type owl:Class ; - + rdfs:label "continuous probability distribution"@en ; - + rdfs:subClassOf ; - + "Orlaith Burke"^^xsd:string ; - + ""@en ; - + "Alejandra Gonzalez-Beltran"@en , "Philippe Rocca-Serra"@en ; - + "a continuousprobability distribution is a probability distribution which is defined by a probability density function"@en ; - + """adapted from Wikipedia http://en.wikipedia.org/wiki/Probability_distribution#Continuous_probability_distribution -last accessed: +last accessed: 14/01/2014"""@en ; - + . ### http://purl.obolibrary.org/obo/STATO_0000117 rdf:type owl:Class ; - + rdfs:label "discrete probability distribution"@en ; - + rdfs:subClassOf ; - + "Orlaith Burke"^^xsd:string , "Alejandra Gonzalez-Beltran"@en , "Philippe Rocca-Serra"@en ; - + "a discrete probability distribution is a probability distribution which is defined by a probability mass function where the random variable can only assume a finite number of values or infinitely countable values"@en ; - + """adapted from Wikipedia http://en.wikipedia.org/wiki/Probability_distribution#Discrete_probability_distribution -last accessed: +last accessed: 14/01/2014"""@en ; - + . ### http://purl.obolibrary.org/obo/STATO_0000276 rdf:type owl:Class ; - + rdfs:label "binomial distribution"@en ; - + rdfs:subClassOf ; - + "Orlaith Burke"^^xsd:string ; - + ""@en ; - + "Alejandra Gonzalez-Beltran"@en , "Philippe Rocca-Serra"@en ; - + """The binomial distribution is a discrete probability distribution which describes the probability of k successes in n draws with replacement from a finite population of size N. The binomial distribution is frequently used to model the number of successes in a sample of size n drawn with replacement from a population of size N. @@ -559,13 +559,13 @@ notation: B(n,p) The mean is N*p The variance is N*p*q"""@en ; - + """dbinom(x, size, prob, log = FALSE) http://stat.ethz.ch/R-manual/R-patched/library/stats/html/Binomial.html"""@en ; - + "http://en.wikipedia.org/wiki/Binomial_distribution"@en ; - + . ### http://purl.obolibrary.org/obo/STATO_0000019 @@ -574,86 +574,86 @@ http://stat.ethz.ch/R-manual/R-patched/library/stats/html/Binomial.html"""@en ; ### http://purl.obolibrary.org/obo/STATO_0000227 rdf:type owl:Class ; - + rdfs:label "normal distribution"@en ; - + rdfs:subClassOf ; - + "Orlaith Burke"^^xsd:string ; - + ""@en ; - + "Alejandra Gonzalez-Beltran"@en ; - + "Gaussian distribution"@en ; - + "Philippe Rocca-Serra"@en ; - + """a normal distribution is a continuous probability distribution described by a probability distribution function described here: http://mathworld.wolfram.com/NormalDistribution.html"""@en ; - + "http://mathworld.wolfram.com/NormalDistribution.html"@en ; - + . ### http://purl.obolibrary.org/obo/STATO_0000051 rdf:type owl:Class ; - + rdfs:label "Poisson distribution"@en ; - + rdfs:subClassOf ; - + "Orlaith Burke"^^xsd:string ; - + ""@en ; - + "Alejandra Gonzalez-Beltran"@en ; - + """NIST: http://www.itl.nist.gov/div898/handbook/eda/section3/eda366j.htm """@en ; - + "Philippe Rocca-Serra"@en ; - + """Poisson distribution is a probability distribution used to model the number of events occurring within a given time interval. It is defined by a real number (λ) and an integer k representing the number of events and a function. The expected value of a Poisson-distributed random variable is equal to λ and so is its variance."""@en ; - + """dpois(x, lambda, log = FALSE) http://stat.ethz.ch/R-manual/R-patched/library/stats/html/Poisson.html"""@en ; - + . ### http://purl.obolibrary.org/obo/STATO_0000405 rdf:type owl:Class ; - + rdfs:label "unstructured covariance structure"@en ; - + rdfs:subClassOf ; - + "Philippe Rocca-Serra" , "Alejandra Gonzalez-Beltran" , "Thomas Nichols" ; - + "A covariance structure where no restrictions are made on the covariance between any pair of measurements."@en ; - + "http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_mixed_sect019.htm#statug.mixed.mixedcovstruct" ; - - "Camille Maumet" . - + + "Camille Maumet" . + ### http://purl.obolibrary.org/obo/STATO_0000129 rdf:type owl:DatatypeProperty ; - + rdfs:label "has value"@en ; - + "Alejandra Gonzalez-Beltran"^^xsd:string , "Orlaith Burke"^^xsd:string , "Philippe Rocca-Serra"^^xsd:string ; - - "A relationship (data property) between an entity and its value."@en . - \ No newline at end of file + "A relationship (data property) between an entity and its value."@en . + + diff --git a/nidm/workflows/ProcessExecution.py b/nidm/workflows/ProcessExecution.py index 269152a5..e752e0d8 100644 --- a/nidm/workflows/ProcessExecution.py +++ b/nidm/workflows/ProcessExecution.py @@ -1,9 +1,6 @@ import prov.model as pm - from ..core import Constants - -from ..experiment.Core import Core -from ..experiment.Core import getUUID +from ..experiment.Core import Core, getUUID class ProcessExecution(pm.ProvActivity, Core): @@ -15,23 +12,24 @@ class ProcessExecution(pm.ProvActivity, Core): and user-supplied graph and namespaces """ + def __init__(self, parentDoc=None, attributes=None): """ Default constructor, creates document and adds Process activity to graph with optional attributes - + :param parentDoc: optional ProvDocument :param attributes: optional dictionary of attributes to add """ - #set graph document - if (parentDoc): + # set graph document + if parentDoc: self.graph = parentDoc else: self.graph = Constants.NIDMDocument(namespaces=Constants.namespaces) - #execute default parent class constructor - super(ProcessExecution, self).__init__(self.graph, - pm.PROV[getUUID()], - attributes) + # execute default parent class constructor + super(ProcessExecution, self).__init__( + self.graph, pm.PROV[getUUID()], attributes + ) self.graph._add_record(self) diff --git a/nidm/workflows/ProcessSpecification.py b/nidm/workflows/ProcessSpecification.py index febd6ec9..49ec27d8 100644 --- a/nidm/workflows/ProcessSpecification.py +++ b/nidm/workflows/ProcessSpecification.py @@ -1,9 +1,6 @@ import prov.model as pm - from ..core import Constants - -from ..experiment.Core import Core -from ..experiment.Core import getUUID +from ..experiment.Core import Core, getUUID class ProcessSpecification(pm.ProvEntity, Core): @@ -20,21 +17,21 @@ def __init__(self, parentdoc=None, attributes=None): """ Default constructor, creates document and adds Process activity to graph with optional attributes - + :param parentDoc: optional ProvDocument :param attributes: optional dictionary of attributes to add - + """ - #set graph document - if (parentdoc): + # set graph document + if parentdoc: self.graph = parentdoc else: self.graph = Constants.NIDMDocument(namespaces=Constants.namespaces) - #execute default parent class constructor - super(ProcessSpecification,self).__init__(self.graph, - pm.PROV[getUUID()], - attributes) + # execute default parent class constructor + super(ProcessSpecification, self).__init__( + self.graph, pm.PROV[getUUID()], attributes + ) self.add_attributes({pm.PROV_TYPE: pm.PROV_ATTR_PLAN}) self.graph._add_record(self) diff --git a/nidm/workflows/README.md b/nidm/workflows/README.md index ed99ad1c..81f319db 100644 --- a/nidm/workflows/README.md +++ b/nidm/workflows/README.md @@ -1,3 +1,2 @@ # NIDM-Workflows Python API Python API to create, query, read, and write NIDM-Workflow documents. - diff --git a/nidm/workflows/__init__.py b/nidm/workflows/__init__.py index 023d5c95..ba25efd0 100644 --- a/nidm/workflows/__init__.py +++ b/nidm/workflows/__init__.py @@ -1,2 +1,2 @@ +from .ProcessExecution import ProcessExecution from .ProcessSpecification import ProcessSpecification -from .ProcessExecution import ProcessExecution \ No newline at end of file diff --git a/profiler.py b/profiler.py index 02109701..91525179 100644 --- a/profiler.py +++ b/profiler.py @@ -1,10 +1,15 @@ -from nidm.experiment.tools.rest import RestParser import cProfile +from nidm.experiment.tools.rest import RestParser + def go(): restParser = RestParser(output_format=RestParser.OBJECT_FORMAT, verbosity_level=5) - result = restParser.run(["ttl/caltech.ttl"], '/projects/e059fc5e-67aa-11ea-84b4-003ee1ce9545/subjects?filter=instruments.ADOS_MODULE gt 2') - print (result) + result = restParser.run( + ["ttl/caltech.ttl"], + "/projects/e059fc5e-67aa-11ea-84b4-003ee1ce9545/subjects?filter=instruments.ADOS_MODULE gt 2", + ) + print(result) + -if __name__ == '__main__': - cProfile.run('go()', filename='profile.output.txt') +if __name__ == "__main__": + cProfile.run("go()", filename="profile.output.txt") diff --git a/rest-server.py b/rest-server.py index 6130b4e1..4f1d5ebe 100644 --- a/rest-server.py +++ b/rest-server.py @@ -1,19 +1,20 @@ -from flask import Flask, request -from flask_restful import Resource, Api import glob -from nidm.experiment.tools.rest import RestParser +from flask import Flask, request from flask_cors import CORS +from flask_restful import Api, Resource +from nidm.experiment.tools.rest import RestParser import simplejson + def getTTLFiles(): files = [] - for filename in glob.glob('/opt/project/ttl/**/*.ttl', recursive=True): + for filename in glob.glob("/opt/project/ttl/**/*.ttl", recursive=True): files.append(filename) return files + class NIDMRest(Resource): def get(self, all): - query_bits = [] for a in request.args.keys(): query_bits.append("{}={}".format(a, request.args.get(a))) @@ -21,26 +22,37 @@ def get(self, all): files = getTTLFiles() if len(files) == 0: - return ({'error' : 'No NIDM files found. You may need to add NIDM ttl files to ~/PyNIDM/ttl'}) - restParser = RestParser(output_format=RestParser.OBJECT_FORMAT, verbosity_level=5) - - json_str = simplejson.dumps(restParser.run(files, "{}?{}".format(all, query)), indent=2) - response = app.response_class(response=json_str, status=200, mimetype='application/json') + return { + "error": "No NIDM files found. You may need to add NIDM ttl files to ~/PyNIDM/ttl" + } + restParser = RestParser( + output_format=RestParser.OBJECT_FORMAT, verbosity_level=5 + ) + + json_str = simplejson.dumps( + restParser.run(files, "{}?{}".format(all, query)), indent=2 + ) + response = app.response_class( + response=json_str, status=200, mimetype="application/json" + ) return response + class Instructions(Resource): def get(self): - - return ({'message' : 'You probably want to start at {}projects See instructions at PyNIDM/docker/README.md for details on the API and loading data.'.format(request.url_root)}) - + return { + "message": "You probably want to start at {}projects See instructions at PyNIDM/docker/README.md for details on the API and loading data.".format( + request.url_root + ) + } app = Flask(__name__) CORS(app) api = Api(app) -api.add_resource(Instructions, '/') -api.add_resource(NIDMRest, '/') +api.add_resource(Instructions, "/") +api.add_resource(NIDMRest, "/") -if __name__ == '__main__': - app.run(debug=True, host='0.0.0.0') \ No newline at end of file +if __name__ == "__main__": + app.run(debug=True, host="0.0.0.0") diff --git a/working_notes.md b/working_notes.md index 8e9e1583..25e9c907 100755 --- a/working_notes.md +++ b/working_notes.md @@ -9,6 +9,6 @@ Use Apache Jena: ## Run one test example: -pytest -k CMU_GetProjectsComputedMetadata +pytest -k CMU_GetProjectsComputedMetadata -- \ No newline at end of file +-