diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..432e42b --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +#Ignore cache and DS_Store files +*.DS_Store +*.pyc + +#ignore notebooks +*.ipynb + +#Ignore all local volume and test files +volumes/ + +#ignore DAGs except template +conduit/dags +!conduit/dags/__init__.py +!conduit/dags/template_dag \ No newline at end of file diff --git a/conduit/CWLDockerfile b/CWLDockerfile similarity index 91% rename from conduit/CWLDockerfile rename to CWLDockerfile index 434bc05..6132609 100644 --- a/conduit/CWLDockerfile +++ b/CWLDockerfile @@ -15,7 +15,7 @@ #### CWL parser dockerfile #### FROM python:3.6-slim -ARG AIRFLOW_VERSION=1.10.0 +ARG AIRFLOW_VERSION=1.10.4 ENV SLUGIFY_USES_TEXT_UNIDECODE=yes RUN set -ex \ && buildDeps=' \ @@ -36,13 +36,13 @@ RUN set -ex \ $buildDeps \ python3-pip \ python3-requests \ - mysql-client \ - mysql-server \ default-libmysqlclient-dev \ apt-utils \ curl \ rsync \ netcat \ + default-mysql-server \ + default-mysql-client \ locales RUN apt-get update \ && apt-get install -y python3-pip python3-dev \ @@ -51,9 +51,6 @@ RUN apt-get update \ RUN pip3 install apache-airflow==$AIRFLOW_VERSION RUN pip install apache-airflow==$AIRFLOW_VERSION -COPY requirements.txt / -RUN pip install -r /requirements.txt -RUN pip3 install -r /requirements.txt # COPY awsbatch_operator.py \ # cwl-to-dag.py \ # create_job_definitions.py \ @@ -63,4 +60,7 @@ RUN pip3 install -r /requirements.txt # s3wrap \ # cwl_monitor /scripts/ # ENTRYPOINT [ "cwl_monitor" ] +COPY ./conduit /conduit +COPY ./setup.py / +RUN pip install -e / ENV PATH="/conduit:${PATH}" diff --git a/Contributing.md b/Contributing.md new file mode 100644 index 0000000..a760f05 --- /dev/null +++ b/Contributing.md @@ -0,0 +1,89 @@ +# Contributing + +When contributing to SABER, please first discuss the change you wish to make via issue, +email, or any other method with the owners of this repository before making a change. + +Please note we have a code of conduct, please follow it in all your interactions with the project. + +## Pull Request Process + +1. Ensure any install or build dependencies are removed before the end of the layer when doing a + build. Pin versions where appropriate. +2. Update the README.md in the appropriate folder with details of changes to the interface or tools. +3. You may merge the Pull Request in once you have the sign-off of two other developers, or if you + do not have permission to do that, you may request the second reviewer to merge it for you. + +## Code of Conduct + +### Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of experience, +nationality, personal appearance, race, religion, or sexual identity and +orientation. + +### Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or +advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +### Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +### Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +### Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at [INSERT EMAIL ADDRESS]. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +### Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ \ No newline at end of file diff --git a/README.md b/README.md index 6c2034c..738f19e 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,9 @@ Please see our [wiki](https://github.com/aplbrain/saber/wiki/Data-Access) for mo ## Legal -Use or redistribution of the Boss system in source and/or binary forms, with or without modification, are permitted provided that the following conditions are met: +Use or redistribution of the SABER system in source and/or binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code or binary forms must adhere to the terms and conditions of any applicable software licenses. -2. End-user documentation or notices, whether included as part of a redistribution or disseminated as part of a legal or scientific disclosure (e.g. publication) or advertisement, must include the following acknowledgement: The Boss software system was designed and developed by the Johns Hopkins University Applied Physics Laboratory (JHU/APL). -3. The names "The Boss", "JHU/APL", "Johns Hopkins University", "Applied Physics Laboratory", "MICrONS", or "IARPA" must not be used to endorse or promote products derived from this software without prior written permission. For written permission, please contact BossAdmin@jhuapl.edu. +2. End-user documentation or notices, whether included as part of a redistribution or disseminated as part of a legal or scientific disclosure (e.g. publication) or advertisement, must include the following acknowledgement: The SABER software system was designed and developed by the Johns Hopkins University Applied Physics Laboratory (JHU/APL). +3. The names "SABER", "JHU/APL", "Johns Hopkins University", "Applied Physics Laboratory" must not be used to endorse or promote products derived from this software without prior written permission. For written permission, please contact BossAdmin@jhuapl.edu. 4. This source code and library is distributed in the hope that it will be useful, but is provided without any warranty of any kind. diff --git a/conduit/WebDockerfile b/WebDockerfile similarity index 78% rename from conduit/WebDockerfile rename to WebDockerfile index 224c13f..86c15bd 100644 --- a/conduit/WebDockerfile +++ b/WebDockerfile @@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.10.0 +ARG AIRFLOW_VERSION=1.10.4 ARG AIRFLOW_HOME=/root # Define en_US. @@ -38,31 +38,29 @@ RUN set -ex \ $buildDeps \ python3-pip \ python3-requests \ - mysql-client \ - mysql-server \ default-libmysqlclient-dev \ apt-utils \ curl \ rsync \ netcat \ locales \ + default-mysql-server \ + default-mysql-client \ && apt-get install -y git \ && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \ && locale-gen \ && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \ && pip install -U pip setuptools wheel \ + && pip install werkzeug==0.16.0 \ + && pip install SQLAlchemy==1.3.15 \ && pip install Cython \ && pip install pytz \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql]==$AIRFLOW_VERSION \ - && pip install celery[redis]==4.1.1 \ - && pip install boto3==1.9.79 \ - && pip install datajoint==0.11.1 \ - && pip install parse==1.9.0 \ - && pip install docker==3.7.0 + && pip install celery[redis]==4.1.1 # && apt-get purge --auto-remove -yqq $buildDeps \ # && apt-get autoremove -yqq --purge \ # && apt-get clean \ @@ -74,13 +72,13 @@ RUN set -ex \ # /usr/share/doc \ # /usr/share/doc-base -RUN git clone https://github.com/aplbrain/cwl-airflow-parser.git \ - && cd cwl-airflow-parser \ - && pip install -U . \ - && cd ../ +# RUN git clone https://github.com/aplbrain/cwl-airflow-parser.git \ +# && cd cwl-airflow-parser \ +# && pip install -U . \ +# && cd ../ -COPY ./scripts/entrypoint.sh /entrypoint.sh -COPY ./config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg +COPY ./conduit/scripts/entrypoint.sh /entrypoint.sh +COPY ./conduit/config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg RUN chown -R airflow: ${AIRFLOW_HOME} @@ -89,4 +87,6 @@ EXPOSE 8080 5555 8793 # USER airflow WORKDIR /home ENTRYPOINT ["/entrypoint.sh"] -CMD ["webserver"] # set default arg for entrypoint \ No newline at end of file +COPY ./conduit /conduit +COPY ./setup.py / +RUN pip install -e / diff --git a/conduit/conduit b/conduit/conduit index c486816..8c15e94 100755 --- a/conduit/conduit +++ b/conduit/conduit @@ -16,29 +16,35 @@ import os import argparse -import yaml +import yaml import logging import csv -from utils.parameterization import parameterize, RandomSampler +from utils.parameterization import * import requests -import cwltool.main as cwltool +import cwltool.main as cwltool import time + + def init(args): raise NotImplementedError + + def construct_parser(args): - if cwltool.main(argsl=['--validate', args.cwl, args.job]) != 0: - raise RuntimeError('CWL failed to validate') - + if cwltool.main(argsl=["--validate", args.cwl, args.job]) != 0: + raise RuntimeError("CWL failed to validate") + with open(args.config) as fp: - config = yaml.load(fp) + config = yaml.full_load(fp) from utils.cwlparser import CwlParser + return CwlParser(args.cwl, config) + + def build(args): p = construct_parser(args) p.create_job_definitions() p.build_docker_images() - - + def parse(args): p = construct_parser(args) @@ -47,142 +53,224 @@ def parse(args): p.build_docker_images() if args.parameterize: with open(args.parameterize) as fp: - pm = yaml.load(fp) + pm = yaml.full_load(fp) p.set_parameterization(pm) - dag = p.generate_dag(args.job) - - log.info('Generating Airflow DAG from workflow CWL') + write_dag(p, args.job) + - log.info('Pickling and writing DAG {} to dag folder'.format(dag.dag_id)) - p.dag_write(dag) - -def collect(args): - p = construct_parser(args) - results = p.collect_results() - write_results(results,args) -def write_results(results, args): - with open(args.output,'w') as fp: - w = csv.DictWriter(fp, fieldnames=results[0].keys()) - w.writeheader() - for row in results: - w.writerow(row) - log.info('Wrote results to {}'.format(args.output)) def optimize(args): + # TODO: Create optimize samping mappings to load classes from + sample_mapping = { + "random": RandomSampler, + "grid": GridSampler, + "batch-grid": BatchGridSampler, + } p = construct_parser(args) p.create_job_definitions() with open(args.job) as fp: - job = yaml.load(fp) + job = yaml.full_load(fp) with open(args.parameterize) as fp: - pm = yaml.load(fp) - if args.sampling_strategy == 'random': - sampler = RandomSampler(pm, job, args.max_iterations) - else: - raise NotImplementedError('Other samplers are not supported yet!') - for i,iteration in enumerate(sampler.sample()): - print('Executing iteration {} of random sampling'.format(i)) - print(iteration) + pm = yaml.full_load(fp) + + try: + sampler_params = pm.pop("sampler") + method_key = sampler_params.pop("method") + method = sample_mapping[method_key] + except KeyError: + raise KeyError( + "Method not found. Available methods: {}".format( + list(sample_mapping.keys()) + ) + ) + + sampler = method(pm, job, **sampler_params) + for i, iteration in enumerate(sampler.sample()): + log.info("Executing iteration {} of {} sampling".format(i, method_key)) + log.info("Parameters: " + str(iteration)) + p.optimization_iteration = i # keeps track of job iterations for subDAG if type(iteration) == list: - p.set_parameterization(iteration) + p.parameterization = iteration elif type(iteration) == dict: - p.set_parameterization([iteration]) - dag = p.generate_dag(args.job) - p.dag_write(dag) - r = requests.get("http://webserver:8080/dagbag/fill") - if r.status_code != 200: - print('Warning: Dag bag was unable to be filled manually, waiting 60s for the scheduler to do it automatically') - time.sleep(60) - wait_for_success(p) + p.parameterization = [iteration] + + # Runs job with parameters from iteration + write_dag(p, args.job) + wait_for_success(p, args.retry, args.continue_on_failure) + + # Obtains results (iterable of ordered dictionary of all completed iterations) and updates sampler results = p.collect_results() - sampler.update(results) + write_results(results, args) - print('Done') -def wait_for_success(parser): - dag_id = parser.dag_id - r = requests.post('http://webserver:8080/dags/{}/run'.format(dag_id), json={}) + print("Done") + + +def collect(args): + p = construct_parser(args) + results = p.collect_results() + write_results(results, args) + + +def write_dag(parser, job): + dag = parser.generate_dag(job) + log.info("Generating Airflow DAG from workflow CWL") + log.info("Pickling and writing DAG {} to dag folder".format(dag.dag_id)) + parser.dag_write(dag) + r = requests.get("http://webserver:8080/dagbag/fill") + if r.status_code != 200: + log.warning( + "Warning: Dag bag was unable to be filled manually, waiting 60s for the scheduler to do it automatically" + ) + time.sleep(60) + + +def run_dag(dag_id): + r = requests.post("http://webserver:8080/dags/{}/run".format(dag_id), json={}) if r.status_code != 200: - raise ValueError('Dag did not launch successfully! Error {}'.format(r.status_code)) + raise ValueError( + "Dag did not launch successfully! Error {}".format(r.status_code) + ) else: - print('Dag launched sucessfully') - r = requests.get('http://webserver:8080/latest_runs') + log.warning("Dag launched sucessfully") + r = requests.get("http://webserver:8080/latest_runs") if r.status_code != 200: - raise ValueError('Failed to get latest runs... Error {}'.format(r.status_code)) + raise ValueError("Failed to get latest runs... Error {}".format(r.status_code)) r = r.json() execution_date = None try: - for dagrun in r['items']: - if dagrun['dag_id'] == dag_id: - execution_date = dagrun['execution_date'] + for dagrun in r["items"]: + if dagrun["dag_id"] == dag_id: + execution_date = dagrun["execution_date"] break if execution_date == None: - raise ValueError('This dag was not found in the latest runs!') + raise ValueError("This dag was not found in the latest runs!") except KeyError: - raise ValueError('Something went wrong...') + raise ValueError("Something went wrong...") + return execution_date + + +def wait_for_success(parser, retry=False, continue_on_failure=False): + dag_id = parser.dag_id + execution_date = run_dag(dag_id) done = False - while done == False: - r = requests.get('http://webserver:8080/dags/{}/dag_runs/{}'.format(dag_id, execution_date)) + while not done: + r = requests.get( + "http://webserver:8080/dags/{}/dag_runs/{}".format(dag_id, execution_date) + ) if r.status_code != 200: - raise ValueError('Unable to get latest run of dag... error {}'.format(r.status_code)) - state = r.json()['state'] - if state == 'success': + raise ValueError( + "Unable to get latest run of dag... error {}".format(r.status_code) + ) + state = r.json()["state"] + if state == "success": done = True - elif state == 'failed': - raise ValueError('Iteration failed! Check logs...') + elif state == "failed": + if continue_on_failure: + log.warning("Failed tasks in DAG {}. Continuing run...".format(dag_id)) + done = True + else: + if retry is True: + log.warning("DAG {} failed. Retrying...".format(dag_id)) + # retry_failed_tasks(dag_id) + execution_date = run_dag(dag_id) + retry = False + else: + raise ValueError("Iteration failed! Check logs...") else: - done = False time.sleep(5) - -if __name__ == '__main__': - + + +def write_results(results, args): + with open(args.output, "w") as fp: + w = csv.DictWriter(fp, fieldnames=results[0].keys()) + w.writeheader() + for row in results: + w.writerow(row) + log.info("Wrote results to {}".format(args.output)) + + +if __name__ == "__main__": + # Arguments - parent_parser = argparse.ArgumentParser(description='Parses a CWL file and generates dags and AWS batch job definitions') - default_config_location = os.path.join(os.path.dirname(__file__), 'config', 'aws_config.yml') + parent_parser = argparse.ArgumentParser( + description="Parses a CWL file and generates dags and AWS batch job definitions" + ) + default_config_location = os.path.join( + os.path.dirname(__file__), "config", "aws_config.yml" + ) - parent_parser.add_argument('--config', '-c', help='Config file', default=default_config_location) - parent_parser.add_argument('--logLevel', help='Set logging level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default='WARNING') + parent_parser.add_argument( + "--config", "-c", help="Config file", default=default_config_location + ) + parent_parser.add_argument( + "--logLevel", + help="Set logging level", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + default="WARNING", + ) - subparsers = parent_parser.add_subparsers(dest='command') + subparsers = parent_parser.add_subparsers(dest="command") subparsers.required = True - init_parser = subparsers.add_parser('init', help='Starts an AWS cloudformation designed to run SABER') + init_parser = subparsers.add_parser( + "init", help="Starts an AWS cloudformation designed to run SABER" + ) init_parser.set_defaults(func=init) - build_parser = subparsers.add_parser('build', help='Builds the relevant docker containers for the workflow') - build_parser.add_argument('cwl', help='CWL file') - build_parser.add_argument('job', help='CWL job file') + build_parser = subparsers.add_parser( + "build", help="Builds the relevant docker containers for the workflow" + ) + build_parser.add_argument("cwl", help="CWL file") + build_parser.add_argument("job", help="CWL job file") build_parser.set_defaults(func=build) - parse_parser = subparsers.add_parser('parse', help='Construct an Airflow workflow from a cwl file') - parse_parser.add_argument('cwl', help='CWL file') - parse_parser.add_argument('job', help='CWL job file') - parse_parser.add_argument('--parameterize','-p', help='Parameterization file') - parse_parser.add_argument('--build', help='Build the containers as well as write the dag', action='store_true') + parse_parser = subparsers.add_parser( + "parse", help="Construct an Airflow workflow from a cwl file" + ) + parse_parser.add_argument("cwl", help="CWL file") + parse_parser.add_argument("job", help="CWL job file") + parse_parser.add_argument("--parameterize", "-p", help="Parameterization file") + parse_parser.add_argument( + "--build", + help="Build the containers as well as write the dag", + action="store_true", + ) parse_parser.set_defaults(func=parse) - collect_parser = subparsers.add_parser('collect', help='Collect results and write to a csv file') - collect_parser.add_argument('cwl', help='CWL file') - collect_parser.add_argument('job', help='CWL job file') - collect_parser.add_argument('output', help='CSV file to write to') + collect_parser = subparsers.add_parser( + "collect", help="Collect results and write to a csv file" + ) + collect_parser.add_argument("cwl", help="CWL file") + collect_parser.add_argument("job", help="CWL job file") + collect_parser.add_argument("output", help="CSV file to write to") collect_parser.set_defaults(func=collect) - optimize_parser = subparsers.add_parser('optimize', help='optimize results and write to a csv file') - optimize_parser.add_argument('cwl', help='CWL file') - optimize_parser.add_argument('job', help='CWL job file') - optimize_parser.add_argument('parameterize', help='Parameterization file') - optimize_parser.add_argument('--output', help='CSV file to write to', default='optiout.csv') - optimize_parser.add_argument('--sampling-strategy', help='Sampling strategy to use', default='random') - optimize_parser.add_argument('--max-iterations', help='Max iterations', type=int) + optimize_parser = subparsers.add_parser( + "optimize", help="optimize results and write to a csv file" + ) + optimize_parser.add_argument("cwl", help="CWL file") + optimize_parser.add_argument("job", help="CWL job file") + optimize_parser.add_argument("parameterize", help="Parameterization file") + optimize_parser.add_argument( + "--output", help="CSV file to write to", default="optiout.csv" + ) + optimize_parser.add_argument( + "--retry", "-r", help="Retry failed runs once", action="store_true" + ) + optimize_parser.add_argument( + "--continue_on_failure", + "-c", + help="Continue running iterations regardless of failed states", + action="store_true", + ) optimize_parser.set_defaults(func=optimize) args = parent_parser.parse_args() - loglevel = getattr(logging, args.logLevel) logging.basicConfig(level=loglevel) log = logging.getLogger(__name__) log.setLevel(loglevel) - log.debug('Debug enabled') + log.debug("Debug enabled") args.func(args) - - \ No newline at end of file diff --git a/conduit/config/airflow.cfg b/conduit/config/airflow.cfg index 75393e3..c847fa1 100644 --- a/conduit/config/airflow.cfg +++ b/conduit/config/airflow.cfg @@ -1,7 +1,4 @@ [core] -# The home folder for airflow, default is ~/airflow -airflow_home = /root - # The folder where your airflow pipelines live, most likely a # subfolder in a code repository # This path must be absolute @@ -51,10 +48,10 @@ sql_alchemy_pool_recycle = 3600 # The amount of parallelism as a setting to the executor. This defines # the max number of task instances that should run simultaneously # on this airflow installation -parallelism = 200 +parallelism = 8 # The number of task instances allowed to run concurrently by the scheduler -dag_concurrency = 200 +dag_concurrency = 8 # Are DAGs paused by default at creation dags_are_paused_at_creation = False @@ -64,7 +61,7 @@ dags_are_paused_at_creation = False non_pooled_task_slot_count = 128 # The maximum number of active DAG runs per DAG -max_active_runs_per_dag = 200 +max_active_runs_per_dag = 8 # Whether to load the examples that ship with Airflow. It's good to # get started, but you probably want to set this to False in a production diff --git a/conduit/config/aws_config.yml b/conduit/config/aws_config.yml index 955538e..05911a3 100644 --- a/conduit/config/aws_config.yml +++ b/conduit/config/aws_config.yml @@ -15,7 +15,7 @@ # Configuration options for AWS # IAM configuration iam: - RoleName: SaberAirflowBatchWF-ecsTaskWithS3-FM8X2T2J3PXJ + RoleName: Saber-Airflow-Workflow-ecsTaskWithS3-45XRWHEJJ8DK # Must start with file:// or be a url (eg. http://) AssumeRolePolicyDocument: file://ec2-trust-policy.json Description: Allows ECS tasks to have full access to S3 @@ -65,7 +65,7 @@ security-group: Description: Default VPC security group GroupName: default job-queue: - jobQueueName: saber-gen-queue + jobQueueName: saber-gpu-queue-enhanced-memory state: ENABLED priority: 1 computeEnvironmentOrder: @@ -77,9 +77,9 @@ job-definitions: retryStrategy: attempts: 1 containerProperties: - vcpus: 4 - memory: 40000 - jobRoleArn: arn:aws:iam::256215146792:role/SaberAirflowBatchWF-ecsTaskWithS3-FM8X2T2J3PXJ + vcpus: 2 + memory: 4000 + jobRoleArn: arn:aws:iam::438004392447:role/Saber-Airflow-Workflow-BatchInstanceRole-1TQSRWFR81Y5O volumes: - name: saber-home host: @@ -89,3 +89,7 @@ job-definitions: - sourceVolume: saber-home containerPath: /saber-home ulimits: [] +datajoint: + host: 'datajoint:3306' + user: root + password: airflow diff --git a/conduit/config/dockerfile_local_template b/conduit/config/dockerfile_local_template new file mode 100644 index 0000000..394ae8b --- /dev/null +++ b/conduit/config/dockerfile_local_template @@ -0,0 +1,20 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +COPY ./localwrap /app/ +RUN python -m pip install parse +RUN python3 -m pip install parse + +ENV PATH="/app:${PATH}" +ENTRYPOINT [] \ No newline at end of file diff --git a/conduit/config/dockerfile_template b/conduit/config/dockerfile_s3_template similarity index 100% rename from conduit/config/dockerfile_template rename to conduit/config/dockerfile_s3_template diff --git a/conduit/config/pool_config.json b/conduit/config/pool_config.json new file mode 100644 index 0000000..44496a0 --- /dev/null +++ b/conduit/config/pool_config.json @@ -0,0 +1,10 @@ +{ + "Local": { + "description": "Local Execution Pool", + "slots": 4 + }, + "Batch": { + "description": "AWS Batch Execution Pool", + "slots": 100 + } +} diff --git a/conduit/dags/template_dag b/conduit/dags/template_dag index 61215c7..a2471f5 100644 --- a/conduit/dags/template_dag +++ b/conduit/dags/template_dag @@ -2,14 +2,15 @@ import pickle from airflow import DAG import os import sys -from pathlib import Path # if you haven't already done so +from pathlib import Path # if you haven't already done so + file = Path(__file__).resolve() parent, root = file.parent, file.parents[1] sys.path.append(str(root)) from utils.awsbatch_operator import AWSBatchOperator from utils.datajoint_hook import * -with open(os.path.join(os.path.dirname(__file__),'{}_dag.pickle'),'rb') as fp: +with open(os.path.join(os.path.dirname(__file__), "{}_dag.pickle"), "rb") as fp: print(fp) - dag = pickle.load(fp) -dagdag = dag \ No newline at end of file + dag = pickle.load(fp) +dagdag = dag diff --git a/conduit/plugins/customapi.py b/conduit/plugins/customapi.py index 381bc19..ebe7ba2 100644 --- a/conduit/plugins/customapi.py +++ b/conduit/plugins/customapi.py @@ -27,10 +27,11 @@ from flask import g, Blueprint, jsonify, request, url_for -api_custom = Blueprint('api_custom', __name__) +api_custom = Blueprint("api_custom", __name__) + @csrf.exempt -@api_custom.route('/dags//run', methods=['POST']) +@api_custom.route("/dags//run", methods=["POST"]) def trigger_dag(dag_id): """ Trigger a new dag run for a Dag with an execution date of now unless @@ -39,26 +40,28 @@ def trigger_dag(dag_id): data = request.get_json(force=True) run_id = None - if 'run_id' in data: - run_id = data['run_id'] + if "run_id" in data: + run_id = data["run_id"] conf = None - if 'conf' in data: - conf = data['conf'] + if "conf" in data: + conf = data["conf"] execution_date = None - if 'execution_date' in data and data['execution_date'] is not None: - execution_date = data['execution_date'] + if "execution_date" in data and data["execution_date"] is not None: + execution_date = data["execution_date"] # Convert string datetime into actual datetime try: execution_date = timezone.parse(execution_date) except ValueError: error_message = ( - 'Given execution date, {}, could not be identified ' - 'as a date. Example date format: 2015-11-16T14:34:15+00:00' - .format(execution_date)) - response = jsonify({'error': error_message}) + "Given execution date, {}, could not be identified " + "as a date. Example date format: 2015-11-16T14:34:15+00:00".format( + execution_date + ) + ) + response = jsonify({"error": error_message}) response.status_code = 400 return response @@ -70,20 +73,19 @@ def trigger_dag(dag_id): response.status_code = err.status_code return response - - response = jsonify(message="Created {}".format(dr)) return response + + @csrf.exempt -@api_custom.route('/dagbag/fill', methods=['GET']) +@api_custom.route("/dagbag/fill", methods=["GET"]) def fill_dagbag(): db = DagBag() db.collect_dags() - return jsonify(message='Done') + return jsonify(message="Done") - -@api_custom.route('/dags//dag_runs', methods=['GET']) +@api_custom.route("/dags//dag_runs", methods=["GET"]) def dag_runs(dag_id): """ Returns a list of Dag Runs for a specific DAG ID. @@ -93,7 +95,7 @@ def dag_runs(dag_id): or all runs if the state is not specified """ try: - state = request.args.get('state') + state = request.args.get("state") dagruns = get_dag_runs(dag_id, state) except AirflowException as err: response = jsonify(error="{}".format(err)) @@ -103,16 +105,14 @@ def dag_runs(dag_id): return jsonify(dagruns) -@api_custom.route('/test', methods=['GET']) - +@api_custom.route("/test", methods=["GET"]) def test(): - return jsonify(status='OK') - + return jsonify(status="OK") @api_custom.route( - '/dags//dag_runs/', - methods=['GET']) + "/dags//dag_runs/", methods=["GET"] +) def dag_run_status(dag_id, execution_date): """ Returns a JSON with a dag_run's public instance variables. @@ -126,10 +126,12 @@ def dag_run_status(dag_id, execution_date): execution_date = timezone.parse(execution_date) except ValueError: error_message = ( - 'Given execution date, {}, could not be identified ' - 'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format( - execution_date)) - response = jsonify({'error': error_message}) + "Given execution date, {}, could not be identified " + "as a date. Example date format: 2015-11-16T14:34:15+00:00".format( + execution_date + ) + ) + response = jsonify({"error": error_message}) response.status_code = 400 return response @@ -144,26 +146,32 @@ def dag_run_status(dag_id, execution_date): return jsonify(info) -@api_custom.route('/latest_runs', methods=['GET']) +@api_custom.route("/latest_runs", methods=["GET"]) def latest_dag_runs(): """Returns the latest DagRun for each DAG formatted for the UI. """ from airflow.models import DagRun + dagruns = DagRun.get_latest_runs() payload = [] for dagrun in dagruns: if dagrun.execution_date: - payload.append({ - 'dag_id': dagrun.dag_id, - 'execution_date': dagrun.execution_date.isoformat(), - 'start_date': ((dagrun.start_date or '') and - dagrun.start_date.isoformat()), - 'dag_run_url': url_for('airflow.graph', dag_id=dagrun.dag_id, - execution_date=dagrun.execution_date) - }) + payload.append( + { + "dag_id": dagrun.dag_id, + "execution_date": dagrun.execution_date.isoformat(), + "start_date": ( + (dagrun.start_date or "") and dagrun.start_date.isoformat() + ), + "dag_run_url": url_for( + "airflow.graph", + dag_id=dagrun.dag_id, + execution_date=dagrun.execution_date, + ), + } + ) return jsonify(items=payload) # old flask versions dont support jsonifying arrays - class AirflowCustomApiPlugin(AirflowPlugin): name = "custom_api" operators = [] diff --git a/conduit/requirements.txt b/conduit/requirements.txt index ca2c284..91798ff 100644 --- a/conduit/requirements.txt +++ b/conduit/requirements.txt @@ -2,5 +2,5 @@ watchdog==0.9.0 parse==1.9.0 boto3==1.9.79 docker==3.7.0 -datajoint==0.11.1 +datajoint==0.11.3 cwltool==1.0.20181217162649 \ No newline at end of file diff --git a/conduit/scripts/entrypoint.sh b/conduit/scripts/entrypoint.sh index 508e1be..4288681 100755 --- a/conduit/scripts/entrypoint.sh +++ b/conduit/scripts/entrypoint.sh @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +set -m TRY_LOOP="20" : "${REDIS_HOST:="redis"}" @@ -37,6 +37,7 @@ export \ AIRFLOW__CORE__LOAD_EXAMPLES \ AIRFLOW__CORE__SQL_ALCHEMY_CONN \ +export AIRFLOW_HOME=/root # Load DAGs exemples (default: Yes) if [[ -z "$AIRFLOW__CORE__LOAD_EXAMPLES" && "${LOAD_EX:=n}" == n ]] @@ -79,7 +80,7 @@ if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1" wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT" fi -airflow pool -s Local 4 + case "$1" in webserver) airflow initdb @@ -87,7 +88,11 @@ case "$1" in # With the "Local" executor it should all run in one container. airflow scheduler & fi - exec airflow webserver + # exec airflow webserver + airflow webserver & + sleep 10 + airflow pool -i /conduit/config/pool_config.json + fg ;; worker|scheduler) # To give the webserver time to run initdb. diff --git a/conduit/scripts/localwrap b/conduit/scripts/localwrap new file mode 100644 index 0000000..feece51 --- /dev/null +++ b/conduit/scripts/localwrap @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import shutil +import os +import subprocess +import parse +import resource + +""" +Local Execution Wrapper for SABER + +Input files are mounted from CWL file_path hint to saber/volumes/data/local. Output files are placed in the same volume under the corresponding +workflow and step name. Using the cache hint causes the tool to be skipped entirely and assumes output files already exists under file_path/workflow_name/step_name/. + +TODO: Cache option should assert if output files exist. +""" + + +parser = argparse.ArgumentParser() + +parser.add_argument( + "--input", + help="Comma delimited list of files to download in the form /directory/file", +) +parser.add_argument( + "--output", + help="Comma delimited list of files to upload in the form /directory/file", +) +parser.add_argument( + "--wf", help="Workflow stepname in the form of workflow_name/step_name" +) +parser.add_argument("--use_cache", help="True/False: Uses existing outputs") +parser.add_argument( + "command", + nargs=argparse.REMAINDER, + help="The actual command being run form of command args ...", +) + +args = parser.parse_args() +VOLUME_PATH = "/volumes/data/local/" + +if args.use_cache == "True": + print("Using files in {}".format(args.wf)) +else: + if args.input: + input_path = os.path.join(VOLUME_PATH, os.path.split(args.wf)[0]) + infiles = args.input.split(",") + for f in infiles: + # Split directory from file name. + fs = os.path.split(f) + fn = fs[-1] + if fs[0] != "": + target = os.path.join(*fs[:-1]) + target = target.split(".")[0] + if not os.path.exists(target) and target != "": + os.makedirs(target) + else: + target = fn + + source = os.path.join(input_path, f) + if not os.path.exists(source): + # File is not located under workflow name (outside directory) + source = os.path.join(VOLUME_PATH, f) + + try: + print("Filename: " + fn) + print("Source: " + source) + print("Target: " + target) + shutil.copy(source, target) + except OSError as e: + if os.path.exists(fn): + pass + else: + print("Error {} while copying file {}".format(e, fn)) + raise e + + proc = subprocess.call(args.command, shell=False) + if proc != 0: + raise SystemError( + "Child process failed with exit code {}... exiting...".format(proc) + ) + + if args.output: + output_path = os.path.join(VOLUME_PATH, args.wf) + if not os.path.exists(output_path): + # Creates the output volume path if it doesn't exist already + os.makedirs(output_path) + # List output files with parent directorys e.g. ['boss_pull_raw.0/pull_output.npy'] + outfiles = args.output.split(",") + for f in outfiles: + fn = os.path.split(f)[-1] + try: + shutil.copy(fn, output_path) + except OSError as e: + print("Got error {} while copying file {}".format(e, fn)) + raise e + print("Deleting file to free space...") + os.remove(fn) + print("TOTAL MEMORY USED: {}".format(resource.getrusage(0).ru_maxrss)) diff --git a/conduit/scripts/s3wrap b/conduit/scripts/s3wrap index b065545..b147ea9 100644 --- a/conduit/scripts/s3wrap +++ b/conduit/scripts/s3wrap @@ -12,101 +12,122 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +S3 Execution Wrapper for SABER +Inputs will be of the form +TO -> --bucket test_bucket + --directory 12f2/stepname + f1 f2 f3 f4 +Should upload f1..4 to S3://test_bucket/12f2/stepname + +FROM -> --bucket test_bucket + --directory 12f2/stepname + f1 f2 f3 f4 +Should download S3://test_bucket/12f2/stepname/f1..4 to current dir + +TODO fix dependence on last file being in the same bucket +As an implementation note, this will fail if the last download file is from another bucket +""" + import argparse import boto3 import os import subprocess import botocore import parse -import resource -# Inputs will be of the form -# TO -> --bucket test_bucket -# --directory 12f2/stepname -# f1 f2 f3 f4 -# Should upload f1..4 to S3://test_bucket/12f2/stepname -# FROM -> --bucket test_bucket -# --directory 12f2/stepname -# f1 f2 f3 f4 -# Should download S3://test_bucket/12f2/stepname/f1..4 to current dir +import resource -parser = argparse.ArgumentParser( -) -# TODO fix dependence on last file being in the same bucket -# As an implementation note, this will fail if the last download file is from another bucket +parser = argparse.ArgumentParser() -parser.add_argument('--download', help='Comma delimited list of files to download in the form bucket/directory/file') -parser.add_argument('--upload', help='Comma delimited list of files to upload in the form bucket/directory/file') -parser.add_argument('--to', required=True, help='Where to upload to. Should be bucket/wf_hash/stepname') -parser.add_argument('--fr', required=True, help='Where to download from. Should be bucket/wf_hash') -parser.add_argument('command', nargs=argparse.REMAINDER, help='The actual command being run form of command args ...') -args = parser.parse_args() - -s3 = boto3.resource('s3') -if args.download: - infiles = args.download.split(',') - for f in infiles: - fs = f.split('/') - param_parse = parse.parse('{}.{}',fs[0]) +parser.add_argument( + "--download", + help="Comma delimited list of files to download in the form bucket/directory/file", +) +parser.add_argument( + "--upload", + help="Comma delimited list of files to upload in the form bucket/directory/file", +) +parser.add_argument( + "--to", required=True, help="Where to upload to. Should be bucket/wf_hash/stepname" +) +parser.add_argument( + "--fr", required=True, help="Where to download from. Should be bucket/wf_hash" +) +parser.add_argument( + "--use_cache", required=True, help="whether s3 cache should be used " +) +parser.add_argument( + "command", + nargs=argparse.REMAINDER, + help="The actual command being run form of command args ...", +) - if len(fs) > 1 and param_parse: - # The workflow has been parameterized - fs[0] = param_parse[0] - bwf = args.fr.split('/') - b=bwf[0] - wf=bwf[1] - # Path is of the form bucket/wf_hash/...dirs../filename - fp = '/'.join(fs[:-1]) - # If the filepath needs to be made - if fp: - if not os.path.exists(fp): - os.makedirs(fp, exist_ok=True) - k = '/'.join([wf,f]) - else: - # This means the file should just be downloaded from the root bucket - # to the current directory - fp = '.' - k = f - try: - print('Downloading {} to {}'.format(k,'/'.join(fs))) - s3.meta.client.download_file( - Bucket=b, - Filename='/'.join(fs), - Key=k - ) - except botocore.exceptions.ClientError as e: - print('Got error {} while trying to download file {}'.format(e, k)) - raise e +args = parser.parse_args() -proc = subprocess.call(args.command, shell=False) -if proc != 0: - raise SystemError('Child process failed with exit code {}... exiting...'.format(proc)) -if args.upload: - outfiles = args.upload.split(',') - for f in outfiles: - fs = f.split('/') - - # Path is of the form /...dirs../filename - fn = fs[-1] - - bs = args.to.split(':') - b = bs[0] - d = bs[1] - fp = '/'.join([d,fn]) - try: - print('Uploading {} to {}'.format(f,fp)) +if args.use_cache == "True": + print("Using Cached S3 File located in {}".format(args.fr)) +else: + s3 = boto3.resource("s3") + if args.download: + infiles = args.download.split(",") + for f in infiles: + fs = f.split("/") + fn = fs[-1] + # Check if file already copied into tool container + if os.path.exists(fn): + pass + else: + param_parse = parse.parse("{}.{}", fs[0]) + if len(fs) > 1 and param_parse: + # The workflow has been parameterized + fs[0] = param_parse[0] + bwf = args.fr.split("/") + b = bwf[0] + wf = bwf[1] + # Path is of the form bucket/wf_hash/...dirs../filename + fp = "/".join(fs[:-1]) + # If the filepath needs to be made + if fp: + if not os.path.exists(fp): + os.makedirs(fp, exist_ok=True) + k = "/".join([wf, f]) + else: + # This means the file should just be downloaded from the + # root bucket to the current directory + fp = "." + k = f + try: + print("Downloading {} to {}".format(k, "/".join(fs))) + s3.meta.client.download_file(Bucket=b, Filename="/".join(fs), Key=k) + except botocore.exceptions.ClientError as e: + print("Got error {} while trying to download file {}".format(e, k)) + raise e - s3.meta.client.upload_file( - Bucket=b, - Filename=f, - Key=fp - ) - except botocore.exceptions.ClientError as e: - print('Got error {} while trying to upload file {}'.format(e, k)) - raise e - print('Deleting file to free space...') - os.remove(f) -print('TOTAL MEMORY USED: {}'.format(resource.getrusage(0).ru_maxrss)) + proc = subprocess.call(args.command, shell=False) + if proc != 0: + raise SystemError( + "Child process failed with exit code {}... exiting...".format(proc) + ) + if args.upload: + outfiles = args.upload.split(",") + for f in outfiles: + fs = f.split("/") + # Path is of the form /...dirs../filename + fn = fs[-1] + bs = args.to.split(":") + b = bs[0] + d = bs[1] + fp = "/".join([d, fn]) + try: + print("Uploading {} to {}".format(f, fp)) + s3.meta.client.upload_file(Bucket=b, Filename=f, Key=fp) + except botocore.exceptions.ClientError as e: + print("Got error {} while trying to upload file {}".format(e, k)) + raise e + print("Deleting file to free space...") + os.remove(f) + print("TOTAL MEMORY USED: {}".format(resource.getrusage(0).ru_maxrss)) diff --git a/conduit/tests/__init__.py b/conduit/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/conduit/tests/test_awsbatch_operator.py b/conduit/tests/test_awsbatch_operator.py new file mode 100644 index 0000000..db0a7fb --- /dev/null +++ b/conduit/tests/test_awsbatch_operator.py @@ -0,0 +1,10 @@ +import unittest +class TestAwsBatchOperator(unittest.TestCase): + def setUp(self): + pass + def test_execute(self): + # Test that execution works properly + # Perhaps use a fake AWS server somehow? + # Initialize object and set operator.client to some fake fixture? + pass + \ No newline at end of file diff --git a/conduit/tests/test_commandlist.py b/conduit/tests/test_commandlist.py new file mode 100644 index 0000000..aaf7a1e --- /dev/null +++ b/conduit/tests/test_commandlist.py @@ -0,0 +1,33 @@ +import unittest +from conduit.utils.command_list import generate_command_list, generate_io_strings, sub_params +import yaml + +class TestCommandlist(unittest.TestCase): + # TODO needs to be class + + def setUp(self): + pass + def test_generate_command_list(self): + # Test cases: + # 1. Single input, single output (SISO) tool + # 2. Multi input, multi output (MIMO) tool + # 3. MIMO tool with local + # 4. MIMO tool with iteration parameters + # 5. MIMO tool with no file path + # 6. MIMO tool with file path + pass + def test_sub_params(self): + # Test cases: + # 1. Single input + # 2. Multi input + # 3. Edge case + pass + def test_generate_io_strings(self): + # Test cases: + # 1. Empty input + # 2. Single input + # 3. Multi input + # 4. Edge case + pass + + \ No newline at end of file diff --git a/conduit/tests/test_cwlparser.py b/conduit/tests/test_cwlparser.py new file mode 100644 index 0000000..3dddba0 --- /dev/null +++ b/conduit/tests/test_cwlparser.py @@ -0,0 +1,72 @@ +import unittest +from conduit.utils.cwlparser import CwlParser + +class TestCwlParser(unittest.TestCase): + def setUp(self): + pass + def test_resolve_tools(self): + # Test cases: + # 1. Single tool + # 2. Multiple tools + # 3. Workflow CWL not in current directory + pass + def test_generate_volume_list(self): + # Test cases: + # 1. Test no outputs + # 2. Single output + # 3. Multiple output + # 4. Empty local path + pass + def test_create_job_definitions(self): + # Test cases: + # 1. Single tool + # 2. Multiple tools + # 3. Workflow CWL not in current directory + pass + def test_build_docker_images(self): + # Test cases: + # 1. Single tool + # 2. Multiple tools + # 3. Multiple tools using same image + pass + def test_create_subdag(self): + # Test cases: + # 1. Single tool + # 2. Multiple tools + # 3. No iterations + # 4. Multiple iterations + # 5. Empty update dict + + pass + + def test_generate_dag(self): + # Test cases: + # 1. Single tool + # 2. Multiple tools + # 3. No iterations + # 4. Multiple iterations + # 5. Empty update dict + # 6. Subdag = False + pass + def test_resolve_args(self): + # Test cases: + # 1. Single tool + # 2. Multiple tools + # 3. No iterations + # 4. Multiple iterations + # 5. Empty update dict + pass + def test_resolve_dependencies(self): + # Test cases: + # 1. Single tool + # 2. Multiple tools + # 3. Single tool, no outputs + # 4. Multiple iterations + # 5. Empty update dict + pass + def test_resolve_glob(self): + # Test cases: + # 1. Undefined tool + # 2. Non-parseable glob + # 3. Parseable but non-input glob + pass diff --git a/conduit/tests/test_data/test_parameterization.yml b/conduit/tests/test_data/test_parameterization.yml new file mode 100644 index 0000000..181e8cd --- /dev/null +++ b/conduit/tests/test_data/test_parameterization.yml @@ -0,0 +1,63 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +metaparam1: + range: + start: 0.0 + stop: 1 + step: 0.1 + parameters: + abs: param1 + steps: + - step1 + - step2 + - step3 +metaparam2: + range: + start: 0.0 + stop: 0.2 + step: 0.1 + parameters: + abs: param2 + steps: + - step1 +metaparam3: + range: + start: 0.0 + stop: 1 + step: 0.1 + parameters: + abs: param2 + steps: + - step1 + +metaparam4: + range: + start: 0.0 + stop: 1 + step: 0.1 + parameters: + abs: param2 + steps: + - step1 + +metaparam5: + range: + start: 0.0 + stop: 1 + step: 0.1 + parameters: + abs: param2 + steps: + - step1 \ No newline at end of file diff --git a/conduit/tests/test_datajoint_hook.py b/conduit/tests/test_datajoint_hook.py new file mode 100644 index 0000000..7c36488 --- /dev/null +++ b/conduit/tests/test_datajoint_hook.py @@ -0,0 +1,8 @@ +import unittest +# Wait until refactor here +# from conduit.utils.datajoint_hook import * + +class TestDatajointHook(unittest.TestCase): + def setUp(self): + pass + \ No newline at end of file diff --git a/conduit/tests/test_job_definitions.py b/conduit/tests/test_job_definitions.py new file mode 100644 index 0000000..56e458f --- /dev/null +++ b/conduit/tests/test_job_definitions.py @@ -0,0 +1,37 @@ +import unittest + +from conduit.utils.job_definitions import (create_and_push_docker_image, + create_job_definition, + create_job_definitions, docker_auth, + docker_login, docker_registry_login, + extract, generate_job_definition, + get_original_docker_name, + make_build_context, make_tag) +class TestJobDefinitions(unittest.TestCase): + # TODO need to make job definitions into a class in order to test properly + def setUp(self): + pass + def test_create_and_push_docker_image(self): + pass + def test_create_job_definition(self): + pass + def test_create_job_definitions(self): + pass + def test_docker_auth(self): + pass + def test_docker_login(self): + pass + def test_docker_registry_login(self): + pass + def test_extract(self): + pass + def test_generate_job_definition(self): + pass + def test_get_original_docker_name(self): + pass + def test_make_build_context(self): + pass + def test_make_tag(self): + pass + + \ No newline at end of file diff --git a/conduit/tests/test_parameterization.py b/conduit/tests/test_parameterization.py new file mode 100644 index 0000000..73e6784 --- /dev/null +++ b/conduit/tests/test_parameterization.py @@ -0,0 +1,70 @@ + +import unittest +import yaml +import json +import os +import itertools +import numpy as np +from conduit.utils.parameterization import parameterize +from conduit.tests.testing_utils import load_test_data +class TestParameterization(unittest.TestCase): + def setUp(self): + self._test_data = load_test_data('test_parameterization.yml') + + def test_parameterize_single(self): + data = self._test_data['metaparam1'] + data = {"metaparam1" : data} + p = parameterize(data) + expected_dict_format = { + "step1" : { + "param1" : "{a}" + }, + "step2" : { + "param1" : "{a}" + }, + "step3" : { + "param1" : "{a}" + } + } + for i,step in enumerate(p): + self.assertDictLike(expected_dict_format, step, a=0.1*i) + def test_parameterize_multiple(self): + data = { + "metaparam1" : self._test_data['metaparam1'], + "metaparam2" : self._test_data['metaparam2'], + } + p = parameterize(data) + expected_dict_format = { + "step1" : { + "param1" : "{a}", + "param2" : "{b}" + }, + "step2" : { + "param1" : "{a}", + + }, + "step3" : { + "param1" : "{a}", + + } + } + vals = list(itertools.product(np.arange(0.0, 1, 0.1),np.arange(0.0, 0.2, 0.1))) + self.assertEqual(len(p), len(vals)) + for step,(a,b) in zip(p,vals): + + self.assertDictLike(expected_dict_format,step, a=a, b=b) + + + def assertDictLike(self, d1, d2, *args, **kwargs): + yaml.Dumper.ignore_aliases = lambda *args : True + d1str = yaml.dump(d1, default_flow_style=False) + d2str = yaml.dump(d2, default_flow_style=False) + + d1str = d1str.format(*args, **kwargs) + # print(d1str, d2str) + d1l = yaml.load(d1str) + d2l = yaml.load(d2str) + self.assertEqual(d1l,d2l) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/conduit/tests/test_saber_docker_operator.py b/conduit/tests/test_saber_docker_operator.py new file mode 100644 index 0000000..a30506d --- /dev/null +++ b/conduit/tests/test_saber_docker_operator.py @@ -0,0 +1,7 @@ +import unittest +class TestSaberDockerOperator(unittest.TestCase): + def setUp(self): + pass + def test_execute(self): + pass + \ No newline at end of file diff --git a/conduit/tests/testing_utils.py b/conduit/tests/testing_utils.py new file mode 100644 index 0000000..7335e92 --- /dev/null +++ b/conduit/tests/testing_utils.py @@ -0,0 +1,7 @@ +import yaml +def load_test_data(filename): + fileloc = os.path.dirname(__file__) + fn = os.path.join(fileloc, 'test_data', filename) + with open(fn) as fp: + test_data = yaml.load(fp) + return test_data \ No newline at end of file diff --git a/conduit/utils/awsbatch_operator.py b/conduit/utils/awsbatch_operator.py index 672c4da..b8c4664 100644 --- a/conduit/utils/awsbatch_operator.py +++ b/conduit/utils/awsbatch_operator.py @@ -15,15 +15,16 @@ import sys import parse - from math import log1p from time import sleep, time from airflow.exceptions import AirflowException from airflow.models import BaseOperator from airflow.utils import apply_defaults -from utils.datajoint_hook import JobMetadata +from conduit.utils.datajoint_hook import DatajointHook, JobMetadata from airflow.contrib.hooks.aws_hook import AwsHook from datajoint.errors import DuplicateError + + class AWSBatchOperator(BaseOperator): """ Execute a job on AWS Batch Service @@ -45,14 +46,26 @@ class AWSBatchOperator(BaseOperator): :param region_name: region name to use in AWS Hook. Override the region_name in connection (if provided) """ - ui_color = '#c3dae0' + ui_color = "#c3dae0" client = None arn = None - template_fields = ('overrides',) + template_fields = ("overrides",) @apply_defaults - def __init__(self, job_name, job_definition, queue, overrides, workflow_id, max_retries=288, - aws_conn_id=None, region_name=None, job_parameters={}, score_format='', **kwargs): + def __init__( + self, + job_name, + job_definition, + queue, + overrides, + workflow_id, + max_retries=288, + aws_conn_id=None, + region_name=None, + job_parameters={}, + score_format="", + **kwargs + ): super(AWSBatchOperator, self).__init__(**kwargs) self.job_name = job_name @@ -65,55 +78,55 @@ def __init__(self, job_name, job_definition, queue, overrides, workflow_id, max_ self.jobParameters = job_parameters self.jobId = None self.jobName = None + self.dj_hook = DatajointHook() + self.workflow_id = workflow_id self.jobmetadata_db = JobMetadata() self.hook = self.get_hook() self.score_format = score_format + def execute(self, context): self.log.info( - 'Running AWS Batch Job - Job definition: %s - on queue %s', - self.job_definition, self.queue - ) - self.log.info('AWSBatchOperator overrides: %s', self.overrides) - - self.client = self.hook.get_client_type( - 'batch', - region_name=self.region_name + "Running AWS Batch Job - Job definition: %s - on queue %s", + self.job_definition, + self.queue, ) + self.log.info("AWSBatchOperator overrides: %s", self.overrides) + self.client = self.hook.get_client_type("batch", region_name=self.region_name) try: response = self.client.submit_job( jobName=self.job_name, jobQueue=self.queue, jobDefinition=self.job_definition, containerOverrides=self.overrides, - parameters=self.jobParameters) - - self.log.info('AWS Batch Job started: %s', response) - - self.jobId = response['jobId'] - self.jobName = response['jobName'] + parameters=self.jobParameters, + ) + self.log.info("AWS Batch Job started: %s", response) + self.jobId = response["jobId"] + self.jobName = response["jobName"] self._wait_for_task_ended() - self._check_success_task() - task_time, score = self._get_score() - iteration = self.task_id.split('.')[1] - real_task_id = self.task_id.split('.')[0] - self.log.info('Inserting {} {} {} {} {} into job metadata database'.format(self.workflow_id, iteration, real_task_id, task_time, score)) - try: - self.jobmetadata_db.insert1({ - 'iteration' : iteration, - 'workflow_id' : self.workflow_id, - 'job_id' : real_task_id, - 'cost' : task_time, - 'score' : score - }) - except DuplicateError: - pass - - self.log.info('AWS Batch Job has been successfully executed: %s', response) + iteration = self.task_id.split(".")[1] + real_task_id = self.task_id.split(".")[0] + self.log.info( + "Inserting {} {} {} {} {} into job metadata database".format( + self.workflow_id, iteration, real_task_id, task_time, score + ) + ) + self.dj_hook.insert1( + { + "iteration": iteration, + "workflow_id": self.workflow_id, + "job_id": real_task_id, + "cost": task_time, + "score": score, + }, + JobMetadata, + ) + self.log.info("AWS Batch Job has been successfully executed: %s", response) except Exception as e: - self.log.info('AWS Batch Job has failed executed') + self.log.info("AWS Batch Job has failed executed") raise AirflowException(e) def _wait_for_task_ended(self): @@ -128,7 +141,7 @@ def _wait_for_task_ended(self): """ # TODO improve this? Checking every 5s doesn't seem like too often... try: - waiter = self.client.get_waiter('job_execution_complete') + waiter = self.client.get_waiter("job_execution_complete") waiter.config.max_attempts = sys.maxsize # timeout is managed by airflow waiter.wait(jobs=[self.jobId]) except ValueError: @@ -137,67 +150,65 @@ def _wait_for_task_ended(self): retries = 0 while (retries < self.max_retries or self.max_retries <= 0) and retry: - response = self.client.describe_jobs( - jobs=[self.jobId] - ) - if response['jobs'][-1]['status'] in ['SUCCEEDED', 'FAILED']: + response = self.client.describe_jobs(jobs=[self.jobId]) + if response["jobs"][-1]["status"] in ["SUCCEEDED", "FAILED"]: retry = False sleep(log1p(retries) * 30) retries += 1 def _check_success_task(self): - response = self.client.describe_jobs( - jobs=[self.jobId], - ) + response = self.client.describe_jobs(jobs=[self.jobId],) - self.log.info('AWS Batch stopped, check status: %s', response) - if len(response.get('jobs')) < 1: - raise AirflowException('No job found for {}'.format(response)) + self.log.info("AWS Batch stopped, check status: %s", response) + if len(response.get("jobs")) < 1: + raise AirflowException("No job found for {}".format(response)) - for job in response['jobs']: - if 'attempts' in job: - containers = job['attempts'] + for job in response["jobs"]: + if "attempts" in job: + containers = job["attempts"] for container in containers: - if (job['status'] == 'FAILED' or - container['container']['exitCode'] != 0): + if ( + job["status"] == "FAILED" + or container["container"]["exitCode"] != 0 + ): print("@@@@") - raise AirflowException('This containers encounter an error during execution {}'.format(job)) - elif job['status'] is not 'SUCCEEDED': - raise AirflowException('This task is still pending {}'.format(job['status'])) + raise AirflowException( + "This containers encounter an error during execution {}".format( + job + ) + ) + elif job["status"] is not "SUCCEEDED": + raise AirflowException( + "This task is still pending {}".format(job["status"]) + ) def get_hook(self): - return AwsHook( - aws_conn_id=self.aws_conn_id - ) + return AwsHook(aws_conn_id=self.aws_conn_id) def on_kill(self): response = self.client.terminate_job( - jobId=self.jobId, - reason='Task killed by the user') + jobId=self.jobId, reason="Task killed by the user" + ) self.log.info(response) + def _get_score(self): - response = self.client.describe_jobs( - jobs=[self.jobId] - ) - runTime = response['jobs'][-1]['stoppedAt'] - response['jobs'][-1]['startedAt'] + response = self.client.describe_jobs(jobs=[self.jobId]) + runTime = response["jobs"][-1]["stoppedAt"] - response["jobs"][-1]["startedAt"] if self.score_format: - logStream = response['jobs'][-1]['container']['logStreamName'] - self.logClient =self.hook.get_client_type( - 'logs', - region_name=self.region_name + logStream = response["jobs"][-1]["container"]["logStreamName"] + self.logClient = self.hook.get_client_type( + "logs", region_name=self.region_name ) response = self.logClient.get_log_events( - logGroupName = '/aws/batch/job', - logStreamName = logStream, + logGroupName="/aws/batch/job", logStreamName=logStream, ) - logEvents = response['events'] + logEvents = response["events"] # Reads events from most recent to least recent (earliest), so the # first match is the most recent score. Perhaps change this? for logEvent in logEvents: - parsed_event = parse.parse(self.score_format, logEvent['message']) - if parsed_event and 'score' in parsed_event.named: - return (runTime, float(parsed_event['score'])) - self.log.info('Score format present but no score found in logs...') + parsed_event = parse.parse(self.score_format, logEvent["message"]) + if parsed_event and "score" in parsed_event.named: + return (runTime, float(parsed_event["score"])) + self.log.info("Score format present but no score found in logs...") return (runTime, None) - diff --git a/conduit/utils/command_list.py b/conduit/utils/command_list.py index 56d979d..aa95a97 100644 --- a/conduit/utils/command_list.py +++ b/conduit/utils/command_list.py @@ -13,58 +13,113 @@ # limitations under the License. import parse -def generate_command_list(tool_yml, step): - ''' + + +def generate_command_list( + tool_yml, iteration_parameters, step, local=False, file_path=None +): + """ Generates an AWS Batch command list from a tool YML Parameters: ----------- tool_yml : dict Tool YML from file + iteration_parameters: dict + Job parameters for a particular step step : dict Step from CWL. Used to make sure that the input is enabled in the workflow - + file_path = path to store intermediate files (local or s3) + Returns: -------- list of str: Command list, where each string is a seperate string. Could be used as input to a docker RUN cmd - ''' + """ # Command list generation + try: + use_cache = step["hints"]["saber"]["use_cache"] + except KeyError: + use_cache = "False" # Prepend to copy data from S3 (if any of the tool inputs are Files) - command_list = ['python3', '/app/s3wrap', '--to', 'Ref::_saber_stepname', '--fr', 'Ref::_saber_home'] - # Only care about file inputs - input_files = [] - if len(tool_yml['inputs']) > 0: - input_files = [t for tn,t in tool_yml['inputs'].items() if t['type'] == 'File'] + if local: + command_list = [ + "python3", + "/app/localwrap", + "--wf", + "Ref::_saber_stepname", + "--use_cache", + str(use_cache), + ] + # Only care about file inputs + seperator = "," + input_files = iteration_parameters.get("_saber_input", []) if len(input_files) > 0: - command_list.append('--download') - command_list.append('Ref::_saber_input') + input_files = input_files.split(",") + command_list.append("--input") + command_list.append(seperator.join(input_files)) - # Append the data outputs to S3 - output_files = [] - if len(tool_yml['outputs']) > 0: - output_files = [t for tn,t in tool_yml['outputs'].items() if t['type'] == 'File'] + # Append the data outputs to S3 + output_files = iteration_parameters.get("_saber_output", []) if len(output_files) > 0: - command_list.append('--upload') - command_list.append('Ref::_saber_output') + output_files = output_files.split(",") + command_list.append("--output") + command_list.append(seperator.join(output_files)) + else: + if file_path is not None: + # bucket/directory/wf_id/ + source = "/".join(file_path.split("/")[:-1]) + command_list = [ + "python3", + "/app/s3wrap", + "--to", + file_path, + "--fr", + source, + "--use_cache", + str(use_cache), + ] + else: + command_list = [ + "python3", + "/app/s3wrap", + "--to", + "Ref::_saber_stepname", + "--fr", + "Ref::_saber_home", + "--use_cache", + str(use_cache), + ] + # Only care about file inputs + input_files = iteration_parameters.get("_saber_input", []) + if len(input_files) > 0: + command_list.append("--download") + command_list.append("Ref::_saber_input") + + # Append the data outputs to S3 + output_files = iteration_parameters.get("_saber_output", []) + if len(output_files) > 0: + command_list.append("--upload") + command_list.append("Ref::_saber_output") # Not really necessary to split but I dont see a use case where one would want a space in their command... - command_list.extend(tool_yml['baseCommand'].split()) - command_list.extend([arg for arg in tool_yml['arguments']]) - + command_list.extend(tool_yml["baseCommand"].split()) + command_list.extend([arg for arg in tool_yml["arguments"]]) + # Create sorted input list to respect CWL input binding - sorted_inps = [(inpn, inp) for inpn, inp in tool_yml['inputs'].items()] - sorted_inps.sort(key=lambda x: x[1]['inputBinding']['position']) + sorted_inps = [(inpn, inp) for inpn, inp in tool_yml["inputs"].items()] + sorted_inps.sort(key=lambda x: x[1]["inputBinding"]["position"]) # Add to the command_list - for inpn,inp in sorted_inps: - if inpn in step['in']: - command_list.append(inp['inputBinding']['prefix']) - command_list.append('Ref::{}'.format(inpn)) + for inpn, inp in sorted_inps: + if inpn in step["in"]: + command_list.append(inp["inputBinding"]["prefix"]) + command_list.append("Ref::{}".format(inpn)) return command_list -def sub_params(command_list,params): - ''' + +def sub_params(command_list, params): + """ Substitutes parameters from their references for use in local execution Parameters: @@ -73,15 +128,17 @@ def sub_params(command_list,params): Generated from above function params: dict Dictionary of form {parameter_name : parameter value} - ''' + """ - for i,command in enumerate(command_list): - parsed_command = parse.parse('Ref::{p}',command ) + for i, command in enumerate(command_list): + parsed_command = parse.parse("Ref::{p}", command) if parsed_command is not None: - command_list[i] = params[parsed_command['p']] + command_list[i] = params[parsed_command["p"]] return command_list -def generate_io_strings(tool_yml, wf_hash, step_params,j): - ''' + + +def generate_io_strings(tool_yml, wf_hash, step_params, j): + """ Generates IO strings for the S3 wrapper Params: @@ -94,35 +151,40 @@ def generate_io_strings(tool_yml, wf_hash, step_params,j): The parameters of the step j: int The iteration (for parameterization) - ''' - - inp_string = [] - out_string = [] - if len(tool_yml['inputs']) > 0: - input_files = dict([(tn,t) for tn,t in tool_yml['inputs'].items() if t['type'] == 'File']) - for i in input_files: - s = step_params[i].split('/') + """ + inp_string = [] + out_string = [] + if len(tool_yml["inputs"]) > 0: + input_files = dict( + [ + (tn, t) + for tn, t in tool_yml["inputs"].items() + if t["type"] == "File" or t["type"] == "File?" + ] + ) + for i in input_files: + if i in step_params.keys(): + s = step_params[i].split("/") if len(s) > 1: # Form input/file # TODO make naming more consistent - s[0] = s[0] #dumb fix - s[0] += '.{}'.format(j) - inp_string.append('/'.join(s)) - - - - if len(tool_yml['outputs']) > 0: - output_files = dict([(tn,t) for tn,t in tool_yml['outputs'].items() if t['type'] == 'File']) - out_string = [] - for t in output_files.values(): - glob = t['outputBinding']['glob'] - # Ad hoc glob resolution - glob_parse = parse.parse('$({}.{})',glob) - if not glob_parse: - # Just a filename, not a reference - out_string.append(glob) - else: - out_string.append(step_params[glob_parse[1]]) - - return (','.join(inp_string), ','.join(out_string)) + s[0] = s[0] # dumb fix + s[0] += ".{}".format(j) + inp_string.append("/".join(s)) + if len(tool_yml["outputs"]) > 0: + output_files = dict( + [(tn, t) for tn, t in tool_yml["outputs"].items() if t["type"] == "File"] + ) + out_string = [] + for t in output_files.values(): + glob = t["outputBinding"]["glob"] + # Ad hoc glob resolution + glob_parse = parse.parse("$({}.{})", glob) + if not glob_parse: + # Just a filename, not a reference + out_string.append(glob) + else: + out_string.append(step_params[glob_parse[1]]) + + return (",".join(inp_string), ",".join(out_string)) diff --git a/conduit/utils/cwlparser.py b/conduit/utils/cwlparser.py index 20d7f29..1fdebcc 100755 --- a/conduit/utils/cwlparser.py +++ b/conduit/utils/cwlparser.py @@ -14,7 +14,7 @@ # limitations under the License. import os -import yaml +import yaml from datetime import datetime, timedelta @@ -25,24 +25,33 @@ import hashlib import boto3 import re +import pathlib +import datajoint +import parse +import cwltool.main as cwltool from airflow import DAG from airflow.operators.subdag_operator import SubDagOperator -from utils.datajoint_hook import Workflow, schema, create_dj_schema, JobMetadata -from datajoint.errors import DuplicateError, DataJointError -import cwltool.main as cwltool -import parse -from utils.parameterization import parameterize -from utils.job_definitions import create_job_definition, generate_job_definition, make_tag, create_and_push_docker_image -from utils.command_list import * -from utils.awsbatch_operator import AWSBatchOperator -from utils.saber_docker_operator import SaberDockerOperator +from conduit.utils.datajoint_hook import Workflow, JobMetadata, DatajointHook +from datajoint.errors import DuplicateError, DataJointError +from conduit.utils.parameterization import parameterize +from conduit.utils.job_definitions import ( + create_job_definition, + generate_job_definition, + make_tag, + create_and_push_docker_image, +) +from conduit.utils.command_list import * +from conduit.utils.awsbatch_operator import AWSBatchOperator +from conduit.utils.saber_docker_operator import SaberDockerOperator log = logging.getLogger(__name__) + + class CwlParser: - def __init__(self, cwl, config, constant=False): - ''' + def __init__(self, cwl, config, constant=False, optimization_iteration=None): + """ Initializes the CWL parser parameters: @@ -53,50 +62,69 @@ def __init__(self, cwl, config, constant=False): AWSbatch queue name constant: bool [Deprecated] - ''' + """ self.default_args = { "depends_on_past": False, "start_date": datetime(2018, 2, 23), "max_retries": 300, - } - filename_prefix = cwl.split('.cwl')[0] + filename_prefix = cwl.split(".cwl")[0] self.workflow_name = os.path.split(filename_prefix)[-1] # For resolving paths relative to files self.cwl_fp = os.path.abspath(os.path.dirname(cwl)) # Load cwl with open(cwl) as fp: - self.cwl = yaml.load(fp) - + self.cwl = yaml.full_load(fp) + self.constant = constant self.steps = self.resolve_tools() self.job_def_arns = {} self.tags = {} self.config = config - self.queue = self.config['job-queue']['jobQueueName'] + self.queue = self.config["job-queue"]["jobQueueName"] # Create AWS job defs and push images - self.workflow_db = Workflow() - self.job_param_db = schema(create_dj_schema(self.cwl['inputs'], self.workflow_name))() + self.dj_hook = DatajointHook(config=config["datajoint"]) + + self.job_param_def = self.dj_hook.create_definition( + self.cwl["inputs"], self.workflow_name + ) self.parameterization = [{}] + self.optimization_iteration = optimization_iteration try: - if self.cwl['doc'] == 'local': + if self.cwl["doc"] == "local": self.local = True else: "Doc specified but not running locally." except KeyError: self.local = False + + def generate_volume_list(self, tool_yml, local_path): + """ + input_files = [] + if len([tn for tn,t in tool_yml['inputs'].items() if t['type'] == 'File']) > 0: + f = iteration_parameters['input'] + fs = f.split('/') + volumes.append(':'.join([fs,fs])) + """ + volumes = [] + if len(tool_yml["outputs"]) > 0 or len(tool_yml["inputs"]) > 0: + volumes.append(local_path + ":/volumes/data/local") + return volumes + def create_job_definitions(self): for stepname, tool in self.steps.items(): - log.info('Generating job definition for step {}'.format(stepname)) + log.info("Generating job definition for step {}".format(stepname)) tag = make_tag(stepname, tool, self.local) self.tags[stepname] = tag if self.local: - print("WORKING LOCALLY- no job definitions generated") + log.info("WORKING LOCALLY- no job definitions generated") else: job_def = generate_job_definition(stepname, tool, tag) - self.job_def_arns[stepname] = create_job_definition(job_def)['jobDefinitionArn'] + self.job_def_arns[stepname] = create_job_definition(job_def)[ + "jobDefinitionArn" + ] def build_docker_images(self): done_tags = [] @@ -107,67 +135,97 @@ def build_docker_images(self): done_tags.append(tag) def resolve_tools(self): - ''' + """ Resolves the tools into yaml files from the cwl file - ''' + """ tool_yamls = {} - toollist = [(step_name, os.path.normpath(os.path.join(self.cwl_fp,step['run']))) for step_name, step in self.cwl['steps'].items()] + toollist = [ + (step_name, os.path.normpath(os.path.join(self.cwl_fp, step["run"]))) + for step_name, step in self.cwl["steps"].items() + ] for tool_name, tool in toollist: tool_yamls[tool_name] = {} with open(tool) as fp: - tool_yaml = yaml.load(fp) + tool_yaml = yaml.full_load(fp) tool_yamls[tool_name] = tool_yaml return tool_yamls - def create_subdag(self, iteration, i, param_db_update_dict, job_params, job, wf_id, deps, dag=None): - subdag_id = '{}_{}.{}'.format(self.workflow_name, wf_id, i) - parent_dag_id = '{}_{}'.format(self.workflow_name, wf_id) + def create_subdag( + self, iteration, i, param_db_update_dict, job_params, job, wf_id, deps, dag=None + ): + subdag_id = "{}_{}.{}".format(self.workflow_name, wf_id, i) + parent_dag_id = "{}_{}".format(self.workflow_name, wf_id) if dag == None: subdag = DAG( - default_args = self.default_args, - dag_id = subdag_id, - schedule_interval = None + default_args=self.default_args, dag_id=subdag_id, schedule_interval=None ) - else: + else: subdag = dag - subdag_steps = {} + subdag_steps = {} for stepname, tool in self.steps.items(): - stepname_c = '{}.{}'.format(stepname,i) - + stepname_c = "{}.{}".format(stepname, i) + iteration_parameters = job_params[stepname].copy() - - if iteration and stepname in iteration: - for key,value in iteration[stepname].items(): - param_db_update_dict[self.cwl['steps'][stepname]['in'][key]] = value - iteration_parameters.update(iteration[stepname]) - (in_string, out_string) = generate_io_strings(tool, wf_id, iteration_parameters,i) - iteration_parameters['_saber_home'] = '{}/{}'.format(job['_saber_bucket'], wf_id) + if stepname in iteration: + for key, value in iteration[stepname].items(): + param_db_update_dict[self.cwl["steps"][stepname]["in"][key]] = value + iteration_parameters.update(iteration[stepname]) + (in_string, out_string) = generate_io_strings( + tool, wf_id, iteration_parameters, i + ) + if self.local: + iteration_parameters["_saber_home"] = wf_id + iteration_parameters["_saber_stepname"] = "{}/{}".format( + wf_id, stepname_c + ) + else: + iteration_parameters["_saber_home"] = "{}/{}".format( + job["_saber_bucket"], wf_id + ) + iteration_parameters["_saber_stepname"] = "{}:{}/{}".format( + job["_saber_bucket"], wf_id, stepname_c + ) if in_string: - iteration_parameters['_saber_input'] = in_string + iteration_parameters["_saber_input"] = in_string if out_string: - iteration_parameters['_saber_output'] = out_string - iteration_parameters['_saber_stepname'] = '{}:{}/{}'.format(job['_saber_bucket'], wf_id,stepname_c) + iteration_parameters["_saber_output"] = out_string try: - score_format = self.cwl['steps'][stepname]['hints']['saber']['score_format'] + score_format = self.cwl["steps"][stepname]["hints"]["saber"][ + "score_format" + ] except KeyError: - score_format = '' + score_format = "" try: - is_local = self.cwl['steps'][stepname]['hints']['saber']['local'] + is_local = self.cwl["steps"][stepname]["hints"]["saber"]["local"] except KeyError: is_local = False try: - step_job_queue = self.cwl['steps'][stepname]['hints']['saber']['queue'] + step_job_queue = self.cwl["steps"][stepname]["hints"]["saber"]["queue"] except KeyError: step_job_queue = self.queue - - log.debug('Score_format: {}'.format(score_format)) - command_list = generate_command_list(tool, self.cwl['steps'][stepname]) + try: + file_path = self.cwl["steps"][stepname]["hints"]["saber"]["file_path"] + if not self.local: + file_path = "{}:{}".format( + job["_saber_bucket"], os.path.join(file_path, stepname_c) + ) + except KeyError: + file_path = None + + log.debug("Score_format: {}".format(score_format)) + command_list = generate_command_list( + tool, + iteration_parameters, + self.cwl["steps"][stepname], + self.local, + file_path, + ) if is_local: if not self.local: creds = boto3.session.Session().get_credentials() env_dict = { - 'AWS_ACCESS_KEY_ID' : creds.access_key, - 'AWS_SECRET_ACCESS_KEY' : creds.secret_key + "AWS_ACCESS_KEY_ID": creds.access_key, + "AWS_SECRET_ACCESS_KEY": creds.secret_key, } t = SaberDockerOperator( task_id=stepname_c, @@ -175,66 +233,71 @@ def create_subdag(self, iteration, i, param_db_update_dict, job_params, job, wf_ score_format=score_format, image=self.tags[stepname], environment=env_dict, - command=' '.join(sub_params(command_list, iteration_parameters)), + command=" ".join( + sub_params(command_list, iteration_parameters) + ), dag=subdag, - pool='Local' - ) + pool="Local", + ) if self.local: + volumes = self.generate_volume_list(tool, file_path) t = SaberDockerOperator( task_id=stepname_c, workflow_id=parent_dag_id, score_format=score_format, image=self.tags[stepname], - command=' '.join(sub_params(command_list, iteration_parameters)), + command=" ".join( + sub_params(command_list, iteration_parameters) + ), dag=subdag, - pool='Local' - ) + pool="Local", + volumes=volumes, + ) else: t = AWSBatchOperator( task_id=stepname_c, - job_name=re.sub('[^A-Za-z0-9-_]+', "", "{}-{}".format(self.workflow_name,stepname_c)), + job_name=re.sub( + "[^A-Za-z0-9-_]+", + "", + "{}-{}".format(self.workflow_name, stepname_c), + ), job_definition=self.job_def_arns[stepname], - overrides={ - 'command':command_list - }, + overrides={"command": command_list}, job_parameters=iteration_parameters, dag=subdag, queue=step_job_queue, workflow_id=parent_dag_id, - score_format=score_format - + score_format=score_format, + pool="Batch", ) subdag_steps[stepname] = t unique_keys = { - 'workflow_id' : parent_dag_id, - 'iteration' : i, + "workflow_id": parent_dag_id, + "iteration": i, } param_db_update_dict.update(unique_keys) - try: - self.job_param_db.insert1(param_db_update_dict) - except DuplicateError: - log.warning('Duplicate entry found for param db, updating') - (self.job_param_db & unique_keys).delete() - self.job_param_db.insert1(param_db_update_dict) + self.dj_hook.update( + param_db_update_dict, classdef=self.job_param_def, primary_keys=unique_keys + ) for dep in deps: subdag_steps[dep[0]].set_upstream(subdag_steps[dep[1]]) return subdag def set_parameterization(self, parameterization=[{}]): - ''' + """ Sets the parameterization for the workflow Parameters ---------- parameterization : iterable of dict Parameterization iterable with each iteration containing the - parameters to be changed and their values.''' + parameters to be changed and their values.""" - self.parameterization = parameterization - - def generate_dag(self,job,**kwargs): + self.parameterization = parameterize(parameterization) + + def generate_dag(self, job, **kwargs): - ''' + """ Generates an AWS airflow dag from CWL Parameters @@ -249,80 +312,88 @@ def generate_dag(self,job,**kwargs): Returns ------- DAG - ''' - # Create the unique name of the workflow based on the dir containing - # the job file + """ + # Create the unique name of the workflow based on the dir containing the job file wf_id = os.path.basename(os.path.dirname(os.path.abspath(job))) with open(job) as fp: - job = yaml.load(fp) + job = yaml.full_load(fp) - - dag_id = '{}_{}'.format(self.workflow_name, wf_id) - self.dag_id = dag_id + dag_id = "{}_{}".format(self.workflow_name, wf_id) + self.dag_id = dag_id default_args = { "depends_on_past": False, "start_date": datetime(2018, 2, 23), "max_retries": 300, - } - try: - self.workflow_db.insert1({ - 'workflow_id' : dag_id, - 'workflow_name' : self.workflow_name - }) - except( DuplicateError,DataJointError): - log.warning('Workflow database entry for {} already exists, reinserting'.format(self.workflow_name)) - # This is the dumbest way to delete an entry that I've ever seen - # delstr = {'workflow_id' : dag_id} - # (self.workflow_db & delstr).delete() - # self.workflow_db.insert1({ - # 'workflow_id' : dag_id, - # 'workflow_name' : self.workflow_name - - # },skip_duplicates=True) + self.dj_hook.init_workflow(id=dag_id, name=self.workflow_name) + except (DuplicateError, DataJointError): + log.warning( + "Workflow database entry for {} already exists, reinserting".format( + self.workflow_name + ) + ) pass - if self.cwl['class'] != 'Workflow': - raise TypeError('CWL is not a workflow') - dag = DAG( - dag_id=dag_id, - default_args=self.default_args, - schedule_interval=None - ) - dag_steps = [] + if self.cwl["class"] != "Workflow": + raise TypeError("CWL is not a workflow") + dag = DAG(dag_id=dag_id, default_args=self.default_args, schedule_interval=None) job_params, deps = self.resolve_args(job) if len(self.parameterization) > 1: - log.info('Parameterization produces {} workflows, totaling {} jobs...'.format(len(self.parameterization), len(self.steps)*len(parameterization))) + log.info( + "Parameterization produces {} workflows, totaling {} jobs...".format( + len(self.parameterization), + len(self.steps) * len(self.parameterization), + ) + ) # If the parameter is a file, use the path param_db_update_dict = {} - for param in self.cwl['inputs']: + for param in self.cwl["inputs"]: if type(job[param]) != dict: param_db_update_dict[param] = job[param] - elif 'path' in job[param]: - param_db_update_dict[param] = job[param]['path'] + elif "path" in job[param]: + param_db_update_dict[param] = job[param]["path"] else: - raise ValueError('Unable to insert parameter {} into job parameter database'.format(param)) - - + raise ValueError( + "Unable to insert parameter {} into job parameter database".format( + param + ) + ) try: - use_subdag = self.cwl['hints']['saber']['use_subdag'] + use_subdag = self.cwl["hints"]["saber"]["use_subdag"] except KeyError: use_subdag = True - for i,iteration in enumerate(self.parameterization): + for i, iteration in enumerate(self.parameterization): + if self.optimization_iteration is None: + task_id = str(i) + else: + task_id = "{}_{}".format(self.optimization_iteration, i) if use_subdag: - subdag = self.create_subdag(iteration, i, param_db_update_dict, job_params, job, wf_id, deps, dag=None) - iteration_subdag_step = SubDagOperator( - subdag = subdag, - task_id = str(i), - dag = dag + subdag = self.create_subdag( + iteration, + task_id, + param_db_update_dict, + job_params, + job, + wf_id, + deps, + dag=None, ) + SubDagOperator(subdag=subdag, task_id=task_id, dag=dag) else: - dag = self.create_subdag(iteration, i, param_db_update_dict, job_params, job, wf_id, deps, dag=dag) - - + dag = self.create_subdag( + iteration, + task_id, + param_db_update_dict, + job_params, + job, + wf_id, + deps, + dag=dag, + ) return dag - def resolve_args(self,job): - ''' + + def resolve_args(self, job): + """ Creates job parameters from job file params: @@ -337,8 +408,8 @@ def resolve_args(self,job): their actual values from the job file list of tuple: List of form (step, step_dependency) - ''' - + """ + # Resolve dependencies output_deps = self.resolve_dependencies() deps = [] @@ -347,72 +418,85 @@ def resolve_args(self,job): inputs = {} optional_params = [] # Resolve inputs to job inputs - for param,t in self.cwl['inputs'].items(): + for param, t in self.cwl["inputs"].items(): if param in job: - + if type(job[param]) != dict: # Raw value # Must convert to string for AWS for some reason inputs[param] = str(job[param]) - elif (type(job[param]) == dict) and (job[param]['class'] == 'File'): - inputs[param] = job[param]['path'] + elif (type(job[param]) == dict) and (job[param]["class"] == "File"): + inputs[param] = job[param]["path"] else: - raise NotImplementedError('Job parameter value type {} not supported yet'.format(job[param]['class'])) - elif type(param) == dict and 'default' in param: + raise NotImplementedError( + "Job parameter value type {} not supported yet".format( + job[param]["class"] + ) + ) + elif type(param) == dict and "default" in param: # Using a default - inputs[param] = str(param['default']) + inputs[param] = str(param["default"]) else: - #Parameter is not in job, check if optional - if '?' in self.cwl['inputs'][param]: + # Parameter is not in job, check if optional + if "?" in self.cwl["inputs"][param]: optional_params.append(param) else: - raise TypeError('Parameter {} is required by workflow, but not present in job description'.format(param)) - + raise TypeError( + "Parameter {} is required by workflow, but not present in job description".format( + param + ) + ) + # Resolve step parameters # Redundant? step_params = {} - for step_name,step in self.cwl['steps'].items(): + for step_name, step in self.cwl["steps"].items(): step_params[step_name] = {} - for input_name,inp in step['in'].items(): - if type(inp) == dict and 'default' in inp: - step_params[step_name][input_name] = inp['default'] + for input_name, inp in step["in"].items(): + if type(inp) == dict and "default" in inp: + step_params[step_name][input_name] = inp["default"] elif inp in output_deps: # Add dependency to dependency list of form (step, step_dependency) - dep = (step_name,inp.split('/')[0]) + dep = (step_name, inp.split("/")[0]) if dep not in deps: deps.append(dep) # Take from previous step's directory - if type(output_deps[inp]) == dict and 'default' in output_deps[inp]: - step_params[step_name][input_name] = '{}/{}'.format(inp.split('/')[0], output_deps[inp]['default']) + if type(output_deps[inp]) == dict and "default" in output_deps[inp]: + step_params[step_name][input_name] = "{}/{}".format( + inp.split("/")[0], output_deps[inp]["default"] + ) else: - step_params[step_name][input_name] = '{}/{}'.format(inp.split('/')[0], inputs[output_deps[inp]]) - + step_params[step_name][input_name] = "{}/{}".format( + inp.split("/")[0], inputs[output_deps[inp]] + ) + elif inp in inputs: step_params[step_name][input_name] = inputs[inp] else: if inp not in optional_params: - raise TypeError('Input {} not recognized'.format(inp)) + raise TypeError("Input {} not recognized".format(inp)) return step_params, deps def resolve_dependencies(self): - ''' + """ Resolves dependencies (i.e. 'tool/output') to input name defined in job file - ''' + """ resolved_outputs = {} - - for tool_name,tool_info in self.steps.items(): - log.debug('Resolving outputs for {}'.format(tool_name)) - if not len(tool_info['outputs']) == 0: - for output_name, output in tool_info['outputs'].items(): - log.debug('...for output {}'.format(output_name)) - resolved_outputs['{}/{}'.format(tool_name, output_name)] = self.resolve_glob( - tool_name=tool_name, - glob=output['outputBinding']['glob'] - ) + + for tool_name, tool_info in self.steps.items(): + log.debug("Resolving outputs for {}".format(tool_name)) + if not len(tool_info["outputs"]) == 0: + for output_name, output in tool_info["outputs"].items(): + log.debug("...for output {}".format(output_name)) + resolved_outputs[ + "{}/{}".format(tool_name, output_name) + ] = self.resolve_glob( + tool_name=tool_name, glob=output["outputBinding"]["glob"] + ) return resolved_outputs def resolve_glob(self, tool_name, glob): - ''' + """ Resolves a glob to an input parameter defined in the tool CWL Parameters @@ -426,35 +510,39 @@ def resolve_glob(self, tool_name, glob): -------- str: An input parameter from the tool CWL - ''' - glob_parse = parse.parse('$({}.{})',glob) + """ + glob_parse = parse.parse("$({}.{})", glob) if not glob_parse: return glob else: - if glob_parse[0] != 'inputs': - raise NotImplementedError('References to non-inputs are not supported') + if glob_parse[0] != "inputs": + raise NotImplementedError("References to non-inputs are not supported") # Glob with the form $(_._) means it must have a reference - return self.cwl['steps'][tool_name]['in'][glob_parse[1]] + return self.cwl["steps"][tool_name]["in"][glob_parse[1]] - def dag_write(self,dag): - ''' + def dag_write(self, dag): + """ Very crude way of saving the DAG so airflow can access it. - ''' - dag_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), '../dags/')) - with open(os.path.join(dag_folder, 'template_dag'),'r') as fp: + """ + dag_folder = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../dags/") + ) + with open(os.path.join(dag_folder, "template_dag"), "r") as fp: template_string = fp.read() - with open(os.path.join(dag_folder, '{}_dag.pickle'.format(self.workflow_name)),'wb') as fp: + with open( + os.path.join(dag_folder, "{}_dag.pickle".format(self.workflow_name)), "wb" + ) as fp: pickle.dump(dag, fp) - with open(os.path.join(dag_folder, '{}_dag.py'.format(self.workflow_name)),'w') as fp: + with open( + os.path.join(dag_folder, "{}_dag.py".format(self.workflow_name)), "w" + ) as fp: fp.write(template_string.format(self.workflow_name)) def collect_results(self): - ''' + """ Prints the results of the workflow - ''' + """ # Datajoint has not implemented unions yet, so it would be as simple as - - query = JobMetadata() * self.job_param_db - return query.fetch(as_dict=True) - # d1 = JobMetadata().fetch() - # s2 = + + return self.dj_hook.query(self.job_param_def) + diff --git a/conduit/utils/datajoint_hook.py b/conduit/utils/datajoint_hook.py index 892d9d0..e8047a7 100644 --- a/conduit/utils/datajoint_hook.py +++ b/conduit/utils/datajoint_hook.py @@ -13,23 +13,27 @@ # limitations under the License. import datajoint as dj +from datajoint import DuplicateError import datetime import uuid from pathlib import Path from airflow.hooks.base_hook import BaseHook import re +import os +import contextlib +from contextlib import closing +import sys +db_types = { + "int": "int", + "boolean": "char(4)", + "float": "float", + "double": "double", + "string": "varchar(64)", + "File": "varchar(64)", +} - - -dj.config['database.host'] = 'datajoint:3306' -dj.config['database.user'] = 'root' -dj.config['database.password'] = 'airflow' -dj.conn() -schema = dj.schema('airflow', locals()) - -@schema class Workflow(dj.Manual): definition = """ # Workflows @@ -37,7 +41,8 @@ class Workflow(dj.Manual): --- workflow_name : varchar(40) """ -@schema + + class JobMetadata(dj.Manual): definition = """ # Metadata table for optimization / statistics @@ -51,40 +56,119 @@ class JobMetadata(dj.Manual): """ -def create_dj_schema(d, wf_name, is_cwl=True): - definition = """ - # Parameter table for workflow {} - -> Workflow - iteration : varchar(6) - --- - - """.format(wf_name) - for k,t in d.items(): - # Ignore saber keys - tp = t.replace('?','') - if not re.match('_saber_.*',k): - try: - djt = db_types[tp] if is_cwl else tp - except KeyError: - djt = "varchar(64)" - definition += " {} = null : {}\n".format(k,djt) - return type("{}Params".format(wf_name.title()), (dj.Manual,), dict(definition=definition)) - - - - - - - -db_types = { - 'int' : 'int', - 'boolean' : 'char(4)', - 'float' : 'float', - 'double' : 'double', - 'string' : 'varchar(64)', - 'File' : 'varchar(64)' -} - - +def handle_key(key): + """ + Handles keys to fit into DataJoint tables + Returns false if saber key + """ + assert isinstance(key, str) + key = key.lower() + if re.match("_saber_.*", key): + return False + if re.match("^[a-z][a-z0-9_]*$", key): + return key + else: + raise ValueError( + "Key must start with a letter and only contain alphanumeric characters and underscores" + ) + + +class DatajointHook(BaseHook): + def __init__(self, classdef=None, safe=True, config=None): + self.safe = safe + if config is None: + self.config = {} + self.config["host"] = "datajoint:3306" + self.config["user"] = "root" + self.config["password"] = "airflow" + else: + self.config = config + self.classdef = classdef + self.context = {} + with closing(self.get_conn()) as conn: + self.create_table(conn, Workflow) + self.create_table(conn, JobMetadata) + + def create_definition(self, d, wf_name, is_cwl=True): + definition = """ + # Parameter table for workflow {} + -> Workflow + iteration : varchar(6) + --- + + """.format( + wf_name + ) + for k, t in d.items(): + # Ignore saber keys + tp = t.replace("?", "") + k = handle_key(k) + if k: + try: + djt = db_types[tp] if is_cwl else tp + except KeyError: + # Unsupport type, try string lmao + djt = "varchar(64)" + definition += " {} = null : {}\n".format(k, djt) + return type( + "{}Params".format(wf_name.title()), + (dj.Manual,), + dict(definition=definition), + ) + + def create_table(self, conn, classdef=None): + if self.classdef is None and classdef is None: + raise AttributeError( + "Schema needs to be set. Create in constructor or by using create_definition" + ) + if classdef is None: + classdef = self.classdef + + schema = dj.schema(schema_name="airflow", connection=conn, context=self.context) + + table = schema(classdef)() + self.context[classdef.__name__] = table + return table + + def insert1(self, row, classdef=None, skip_duplicates=True, **kwargs): + if classdef is None: + classdef = self.classdef + with closing(self.get_conn()) as conn: + table = self.create_table(conn, classdef=classdef) + + ret = table.insert1(row, skip_duplicates=skip_duplicates, **kwargs) + return ret + + def get_conn(self): + with open(os.devnull, "w") as devnull: + with contextlib.redirect_stdout(devnull): + conn = dj.conn(**self.config, reset=True) + return conn + + def update(self, row, primary_keys={}, classdef=None, **kwargs): + if classdef is None: + classdef = self.classdef + with closing(self.get_conn()) as conn: + table = self.create_table(conn, classdef=classdef) + try: + ret = table.insert1(row, skip_duplicates=False, **kwargs) + except DuplicateError: + with dj.config(safemode=False): + (table & primary_keys).delete() + ret = table.insert1(row, skip_duplicates=False, **kwargs) + return ret + + def query(self, classdef=None): + if classdef is None: + classdef = self.classdef + with closing(self.get_conn()) as conn: + table = self.create_table(conn, classdef=classdef) + + jmdb = self.create_table(conn, JobMetadata) + query = table * jmdb + return query.fetch(as_dict=True) + + def init_workflow(self, id, name): + self.insert1(dict(workflow_id=id, workflow_name=name), classdef=Workflow) diff --git a/conduit/utils/job_definitions.py b/conduit/utils/job_definitions.py index 2a94428..5b4aa51 100644 --- a/conduit/utils/job_definitions.py +++ b/conduit/utils/job_definitions.py @@ -19,11 +19,11 @@ import logging import yaml import docker -import base64 +import base64 from urllib.parse import urlparse import tarfile - import tempfile + try: from BytesIO import BytesIO from StringIO import StringIO @@ -31,64 +31,68 @@ from io import BytesIO from io import StringIO + def docker_auth(): - ''' + """ Autheticates the AWS ECR registry Returns: -------- dict: JSON response from the server - ''' - ecr_client = boto3.client('ecr') - auth_response = ecr_client.get_authorization_token()['authorizationData'] + """ + ecr_client = boto3.client("ecr") + auth_response = ecr_client.get_authorization_token()["authorizationData"] if len(auth_response) > 1: - log.warning('Multiple authorizations for AWS ECR detected, using first one') + log.warning("Multiple authorizations for AWS ECR detected, using first one") auth_response = auth_response[0] return auth_response + def docker_registry_login(): - ''' + """ Gets the docker registry name from the auth response Returns: -------- str : docker registry - ''' + """ auth_response = docker_auth() - docker_registry = urlparse(auth_response['proxyEndpoint']).netloc + docker_registry = urlparse(auth_response["proxyEndpoint"]).netloc return docker_registry + def docker_login(): - ''' + """ Logs into the docker registry to push images Returns: -------- docker.client - ''' + """ auth_response = docker_auth() - auth_token = base64.b64decode(auth_response['authorizationToken']) - auth_token = auth_token.decode('ascii').split(':') + auth_token = base64.b64decode(auth_response["authorizationToken"]) + auth_token = auth_token.decode("ascii").split(":") docker_client = docker.from_env() login_response = docker_client.login( username=auth_token[0], password=auth_token[1], - registry=auth_response['proxyEndpoint'], - reauth=True + registry=auth_response["proxyEndpoint"], + reauth=True, ) - if 'Status' in login_response: - log.info(login_response['Status']) + if "Status" in login_response: + log.info(login_response["Status"]) return docker_client else: log.error("ERROR") log.error(login_response) - raise RuntimeError('Docker login failed') + raise RuntimeError("Docker login failed") + -def extract(d, keys,exclude=False): - ''' +def extract(d, keys, exclude=False): + """ Helper function to extract keys and values from dictionary Parameters @@ -102,13 +106,15 @@ def extract(d, keys,exclude=False): Returns ------- dict : Dict with extracted keys and values - ''' + """ if exclude: return dict((k, d[k]) for k in d if k not in keys) else: return dict((k, d[k]) for k in keys if k in d) -def make_build_context(docker_image_name): - ''' + + +def make_build_context(docker_image_name, local=False): + """ Makes a build context for the wrapped docker image Parameters: @@ -120,24 +126,48 @@ def make_build_context(docker_image_name): -------- tempfile.NamedTemporaryFile : Temporary dockerfile for build - ''' - s3fd = os.open(os.path.join(os.path.dirname(__file__),'../scripts/s3wrap'), os.O_RDONLY) - s3fp_info = tarfile.TarInfo('s3wrap') - s3fp_info.size = os.fstat(s3fd).st_size - dockerfile = BytesIO() - log.debug('Docker image name: {}'.format(docker_image_name)) - dockerfile.write('FROM {}\n'.format(docker_image_name).encode()) - with open(os.path.join(os.path.dirname(__file__),'../config/dockerfile_template'), 'r') as template_file: - for line in template_file.readlines(): - dockerfile.write(line.encode()) - dockerfile.seek(0) + """ + if local: + s3fd = os.open( + os.path.join(os.path.dirname(__file__), "../scripts/localwrap"), os.O_RDONLY + ) + s3fp_info = tarfile.TarInfo("localwrap") + s3fp_info.size = os.fstat(s3fd).st_size + dockerfile = BytesIO() + log.debug("Docker image name: {}".format(docker_image_name)) + dockerfile.write("FROM {}\n".format(docker_image_name).encode()) + with open( + os.path.join( + os.path.dirname(__file__), "../config/dockerfile_local_template" + ), + "r", + ) as template_file: + for line in template_file.readlines(): + dockerfile.write(line.encode()) + dockerfile.seek(0) + else: + s3fd = os.open( + os.path.join(os.path.dirname(__file__), "../scripts/s3wrap"), os.O_RDONLY + ) + s3fp_info = tarfile.TarInfo("s3wrap") + s3fp_info.size = os.fstat(s3fd).st_size + dockerfile = BytesIO() + log.debug("Docker image name: {}".format(docker_image_name)) + dockerfile.write("FROM {}\n".format(docker_image_name).encode()) + with open( + os.path.join(os.path.dirname(__file__), "../config/dockerfile_s3_template"), + "r", + ) as template_file: + for line in template_file.readlines(): + dockerfile.write(line.encode()) + dockerfile.seek(0) # Make build context f = tempfile.NamedTemporaryFile() - t = tarfile.open(mode='w', fileobj=f) - s3fp = os.fdopen(s3fd, mode='rb') - dfinfo = tarfile.TarInfo('Dockerfile') + t = tarfile.open(mode="w", fileobj=f) + s3fp = os.fdopen(s3fd, mode="rb") + dfinfo = tarfile.TarInfo("Dockerfile") dfinfo.size = len(dockerfile.getvalue()) t.addfile(dfinfo, dockerfile) t.addfile(s3fp_info, s3fp) @@ -146,8 +176,9 @@ def make_build_context(docker_image_name): f.seek(0) return f + def get_original_docker_name(tool_yml): - ''' + """ Gets the original docker name from a tool CWL Parameters: @@ -159,15 +190,17 @@ def get_original_docker_name(tool_yml): -------- str: Original docker name - ''' - try: - orig_docker_image_name = tool_yml['hints']['DockerRequirement']['dockerPull'] + """ + try: + orig_docker_image_name = tool_yml["hints"]["DockerRequirement"]["dockerPull"] except KeyError: - raise NotImplementedError('Non-docker based tools are not supported') + raise NotImplementedError("Non-docker based tools are not supported") return orig_docker_image_name + + def make_tag(tool_name, tool_yml, local): - ''' + """ Makes a tag form the tool name and tool CWL Parameters: ----------- @@ -180,33 +213,33 @@ def make_tag(tool_name, tool_yml, local): -------- str: Docker image tag of the form "registry/original_name:s3" - ''' + """ orig_docker_image_name = get_original_docker_name(tool_yml) - docker_image_name_s = orig_docker_image_name.split('/') + docker_image_name_s = orig_docker_image_name.split("/") if not local: auth_response = docker_auth() # Seperate out docker repo name if len(docker_image_name_s) == 3: # Includes repository name docker_repo_name = docker_image_name_s[0] - docker_image_name = '/'.join(docker_image_name_s[1:]) + docker_image_name = "/".join(docker_image_name_s[1:]) else: docker_image_name = orig_docker_image_name - docker_repo_name = '' - docker_tag_s = docker_image_name.split(':') + docker_repo_name = "" + docker_tag_s = docker_image_name.split(":") short_docker_image_name = docker_tag_s[0] # if auth_response['proxyEndpoint'] != docker_repo_name: # log.warning('Docker repo does not match AWS docker repo') if not local: docker_registry = docker_registry_login() - tag = '{}/{}:s3'.format(docker_registry, short_docker_image_name) + tag = "{}/{}:s3".format(docker_registry, short_docker_image_name) if local: - tag = short_docker_image_name + ":saber" + tag = "{}:local".format(short_docker_image_name) return tag def create_and_push_docker_image(tool_yml, tag, local): - ''' + """ Creates and pushes a docker image from the created context Parameters: @@ -221,44 +254,40 @@ def create_and_push_docker_image(tool_yml, tag, local): str: Tag from input - ''' + """ orig_docker_image_name = get_original_docker_name(tool_yml) - dockerfile_tar = make_build_context(orig_docker_image_name) - if not local: - docker_client = docker_login() - if local: + dockerfile_tar = make_build_context(orig_docker_image_name, local=local) + if local: docker_client = docker.from_env() + else: + docker_client = docker_login() try: im, bgen = docker_client.images.build( - fileobj=dockerfile_tar, - rm=True, - pull=True, - tag=tag, - custom_context=True) + fileobj=dockerfile_tar, rm=True, pull=True, tag=tag, custom_context=True + ) except docker.errors.BuildError as e: log.warn('Error building image "{}", trying with local image...'.format(e)) - dockerfile_tar = make_build_context(orig_docker_image_name) + dockerfile_tar = make_build_context(orig_docker_image_name, local) im, bgen = docker_client.images.build( - fileobj=dockerfile_tar, - rm=True, - pull=False, - tag=tag, - custom_context=True) - prev_line = '' + fileobj=dockerfile_tar, rm=True, pull=False, tag=tag, custom_context=True + ) + prev_line = "" for line in bgen: - if 'stream' in line and line['stream'] != prev_line: - log.info(line['stream']) - prev_line = line['stream'] - prev_line = '' + if "stream" in line and line["stream"] != prev_line: + log.info(line["stream"]) + prev_line = line["stream"] + prev_line = "" if not local: for line in docker_client.images.push(tag, stream=True, decode=True): - if 'status' in line and line['status'] != prev_line: - log.info(line['status']) - prev_line = line['status'] + if "status" in line and line["status"] != prev_line: + log.info(line["status"]) + prev_line = line["status"] return tag + + def generate_job_definition(tool_name, tool_yml, tag): - ''' + """ Generates an AWS batch job definition containing the running requirements and image. Does NOT include a command, as this is added as an override. This was done because AWS will throw an error if the job submitted does not @@ -279,30 +308,45 @@ def generate_job_definition(tool_name, tool_yml, tag): Job definition in the format described by AWS See: https://docs.aws.amazon.com/batch/latest/userguide/job-definition-template.html - ''' + """ # Load template from configs - with open(os.path.join(os.path.dirname(__file__),'../config/aws_config.yml' )) as fp: - job_definition = yaml.load(fp)['job-definitions'] + with open( + os.path.join(os.path.dirname(__file__), "../config/aws_config.yml") + ) as fp: + job_definition = yaml.load(fp)["job-definitions"] - job_definition['containerProperties']['image'] = tag - - - job_definition['containerProperties']['command'] = [] - job_definition['jobDefinitionName'] = tool_name.replace('_','-') + job_definition["containerProperties"]["image"] = tag + + job_definition["containerProperties"]["command"] = [] + job_definition["jobDefinitionName"] = tool_name.replace("_", "-") try: - job_definition['containerProperties']['memory'] = tool_yml['requirements']['ResourceRequirement']['ramMin'] + job_definition["containerProperties"]["memory"] = tool_yml["requirements"][ + "ResourceRequirement" + ]["ramMin"] except KeyError: - log.warning('No memory resource requirements specified, using default of {}'.format(job_definition['containerProperties']['memory'])) - + log.warning( + "No memory resource requirements specified, using default of {}".format( + job_definition["containerProperties"]["memory"] + ) + ) + try: - job_definition['containerProperties']['vcpus'] = tool_yml['requirements']['ResourceRequirement']['coresMin'] + job_definition["containerProperties"]["vcpus"] = tool_yml["requirements"][ + "ResourceRequirement" + ]["coresMin"] except KeyError: - log.warning('No vCPU resource requirements specified, using default of {}'.format(job_definition['containerProperties']['vcpus'])) + log.warning( + "No vCPU resource requirements specified, using default of {}".format( + job_definition["containerProperties"]["vcpus"] + ) + ) return job_definition + + def create_job_definitions(): - ''' + """ Creates job definitions from json files in ./job-definitions/ Returns @@ -315,22 +359,22 @@ def create_job_definitions(): 'revision': 123 } - ''' + """ - ret = [] job_definition_files = glob.glob( - os.path.join(os.path.dirname(__file__),'../config/job-definitions/*.json') - ) + os.path.join(os.path.dirname(__file__), "../config/job-definitions/*.json") + ) for job_definition_file in job_definition_files: with open(job_definition_file) as fp: job_definition = json.load(fp) ret.append(create_job_definition(job_definition)) return ret + def create_job_definition(job_definition): - ''' + """ Registers a job definition with AWS, checking if it was already defined. Parameters @@ -348,42 +392,59 @@ def create_job_definition(job_definition): jobDefinitionArn: str, revision: int } - ''' - log.debug('Describing job definition from job {}'.format(job_definition['jobDefinitionName'])) - check_response = client.describe_job_definitions( - jobDefinitionName=job_definition['jobDefinitionName'], - status='ACTIVE' + """ + log.debug( + "Describing job definition from job {}".format( + job_definition["jobDefinitionName"] ) - - if check_response['jobDefinitions'] == []: - log.info('Creating job definition for job {}...'.format(job_definition['jobDefinitionName'])) - ret = client.register_job_definition( - **job_definition + ) + check_response = client.describe_job_definitions( + jobDefinitionName=job_definition["jobDefinitionName"], status="ACTIVE" + ) + + if check_response["jobDefinitions"] == []: + log.info( + "Creating job definition for job {}...".format( + job_definition["jobDefinitionName"] + ) ) + ret = client.register_job_definition(**job_definition) else: - for i,c_check_reponse in enumerate(check_response['jobDefinitions']): + for i, c_check_reponse in enumerate(check_response["jobDefinitions"]): c_check_response = extract( - d=c_check_reponse, - keys=['jobDefinitionArn','status','revision'], - exclude=True + d=c_check_reponse, + keys=["jobDefinitionArn", "status", "revision"], + exclude=True, ) if c_check_response == job_definition: - log.warning('Job definition exists for job {} and matches revision {}. Continuing...'.format(job_definition['jobDefinitionName'],check_response['jobDefinitions'][i]['revision'] )) + log.warning( + "Job definition exists for job {} and matches revision {}. Continuing...".format( + job_definition["jobDefinitionName"], + check_response["jobDefinitions"][i]["revision"], + ) + ) ret = extract( - d=check_response['jobDefinitions'][i], - keys=['jobDefinitionName','jobDefinitionArn','revision'] + d=check_response["jobDefinitions"][i], + keys=["jobDefinitionName", "jobDefinitionArn", "revision"], ) return ret - log.warning('Job definition exists for job {}, but does not match file. Creating new revision...'.format(job_definition['jobDefinitionName'])) + log.warning( + "Job definition exists for job {}, but does not match file. Creating new revision...".format( + job_definition["jobDefinitionName"] + ) + ) ret = extract( - d=client.register_job_definition( - **job_definition - ), - keys=['jobDefinitionName','jobDefinitionArn','revision'] - ) + d=client.register_job_definition(**job_definition), + keys=["jobDefinitionName", "jobDefinitionArn", "revision"], + ) return ret -client = boto3.client('batch') + + +try: + client = boto3.client("batch") +except: + print("Local Execution only (no S3)") if __name__ == "__main__": logging.basicConfig() log = logging.getLogger(__name__) @@ -391,5 +452,3 @@ def create_job_definition(job_definition): else: log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) - - diff --git a/conduit/utils/parameterization.py b/conduit/utils/parameterization.py index ceaf4e8..ec1a971 100644 --- a/conduit/utils/parameterization.py +++ b/conduit/utils/parameterization.py @@ -15,12 +15,14 @@ import yaml import numpy as np import itertools -from abc import ABC, abstractmethod import random +from abc import ABC, abstractmethod + + def parameterize(p): param_ranges = [] for mp_name, metaparam in p.items(): - r = metaparam['range'] + r = metaparam["range"] if type(r) == dict: param_ranges.append(np.arange(**r)) elif type(r) == list: @@ -30,60 +32,108 @@ def parameterize(p): iterations = [] for i in itertools.product(*param_ranges): iteration = {} - for j,(mp_name, metaparam) in enumerate(p.items()): + for j, (mp_name, metaparam) in enumerate(p.items()): job = {} - if set(metaparam['parameters'].keys()) == set(['min', 'max']): - if set(metaparam['range'].keys()) == set(['start', 'stop', 'step']): - job[metaparam['parameters']['min']] = str(i[j]) - job[metaparam['parameters']['max']] = str(min(i[j] + metaparam['range']['step'], metaparam['range']['stop'])) + if set(metaparam["parameters"].keys()) == set(["min", "max"]): + if set(metaparam["range"].keys()) == set(["start", "stop", "step"]): + job[metaparam["parameters"]["min"]] = str(i[j]) + job[metaparam["parameters"]["max"]] = str( + min( + i[j] + metaparam["range"]["step"], + metaparam["range"]["stop"], + ) + ) else: - raise KeyError("In order to use min/max parameterization, you need to specify a range with start, stop and step") - elif set(metaparam['parameters'].keys()) == set(['abs']): - job[metaparam['parameters']['abs']] = str(i[j]) + raise KeyError( + "In order to use min/max parameterization, you need to specify a range with start, stop and step" + ) + elif set(metaparam["parameters"].keys()) == set(["abs"]): + job[metaparam["parameters"]["abs"]] = str(i[j]) else: - raise KeyError("Parameters type(s) {} not valid".format(metaparam['parameters'].keys())) - for stepname in metaparam['steps']: + raise KeyError( + "Parameters type(s) {} not valid".format( + metaparam["parameters"].keys() + ) + ) + for stepname in metaparam["steps"]: if stepname in iteration: iteration[stepname].update(job) else: iteration[stepname] = job - + iterations.append(iteration) return iterations + + class Sampler(ABC): - ''' + """ Abstract class for a sampler - ''' - def __init__(self, parameterization_dict, job): + """ + + def __init__(self, parameterization_dict, job, **kwargs): self.job = job self.parameters = parameterization_dict - + super().__init__() + @abstractmethod def update(self, results): pass - + @abstractmethod def sample(self): pass + class RandomSampler(Sampler): - def __init__(self, parameterization_dict, job, max_iterations): + def __init__(self, parameterization_dict, job, max_iterations, **kwargs): + super().__init__(parameterization_dict, job) self.param_grid = parameterize(parameterization_dict) self.max_iterations = max_iterations self.update(None) - super().__init__(parameterization_dict, job) def update(self, results): - self.next_job = random.choice(self.param_grid) + + def sample(self): + for _ in range(self.max_iterations): + yield self.next_job + + +class GridSampler(Sampler): + def __init__(self, parameterization_dict, job, **kwargs): + super().__init__(parameterization_dict, job) + self.param_grid = parameterize(parameterization_dict) + self.count = 0 + self.update(None) + + def update(self, results): + self.next_job = self.param_grid[self.count] + self.count += 1 + + def sample(self): + for _ in range(len(self.param_grid)): + yield self.next_job + + +class BatchGridSampler(Sampler): + def __init__(self, parameterization_dict, job, batch_size, **kwargs): + super().__init__(parameterization_dict, job) + self.batch_size = batch_size + self.param_grid = parameterize(parameterization_dict) + self.num_of_batches = int(np.ceil(len(self.param_grid) / self.batch_size)) + self.batch_index = 0 + self.update(None) + + def update(self, results): + start = self.batch_index + end = self.batch_index + self.batch_size + if end > len(self.param_grid): + self.next_job = self.param_grid[start:] + else: + self.next_job = self.param_grid[start:end] + self.batch_index = end + def sample(self): - for i in range(self.max_iterations): + for _ in range(self.num_of_batches): yield self.next_job -if __name__ == '__main__': - with open('parameterization.yml') as fp: - p = yaml.load(fp) - para = parameterize(p) - print(para[0]) - print(len(para)) - diff --git a/conduit/utils/saber_docker_operator.py b/conduit/utils/saber_docker_operator.py index 464d29b..13b7925 100644 --- a/conduit/utils/saber_docker_operator.py +++ b/conduit/utils/saber_docker_operator.py @@ -12,46 +12,54 @@ # See the License for the specific language governing permissions and # limitations under the License. -from airflow.operators.docker_operator import DockerOperator -from utils.datajoint_hook import JobMetadata -from datajoint import DuplicateError import time import parse +from airflow.operators.docker_operator import DockerOperator +from conduit.utils.datajoint_hook import DatajointHook, JobMetadata +from datajoint import DuplicateError + + class SaberDockerOperator(DockerOperator): - def __init__(self, *args, workflow_id, score_format='', **kwargs): + def __init__(self, *args, workflow_id, score_format="", **kwargs): super().__init__(*args, **kwargs) self.score_format = score_format self.workflow_id = workflow_id - self.task_id = kwargs['task_id'] - self.jobmetadata_db = JobMetadata() + self.task_id = kwargs["task_id"] + self.dj_hook = DatajointHook() + def execute(self, *args, **kwargs): begin_time = time.time() super().execute(*args, **kwargs) task_time = time.time() - begin_time score = self._get_score() - iteration = self.task_id.split('.')[1] - real_task_id = self.task_id.split('.')[0] - self.log.info('Inserting {} {} {} {} {} into job metadata database'.format(self.workflow_id, iteration, real_task_id, task_time, score)) - try: - self.jobmetadata_db.insert1({ - 'iteration' : iteration, - 'workflow_id' : self.workflow_id, - 'job_id' : real_task_id, - 'cost' : task_time, - 'score' : score - }) - except DuplicateError: - pass + iteration = self.task_id.split(".")[1] + real_task_id = self.task_id.split(".")[0] + self.log.info( + "Inserting {} {} {} {} {} into job metadata database".format( + self.workflow_id, iteration, real_task_id, task_time, score + ) + ) + self.dj_hook.insert1( + { + "iteration": iteration, + "workflow_id": self.workflow_id, + "job_id": real_task_id, + "cost": task_time, + "score": score, + }, + JobMetadata, + ) + def _get_score(self): - + if self.score_format: - logEvents = self.cli.logs(container=self.container['Id'], stream=True) + logEvents = self.cli.logs(container=self.container["Id"], stream=True) # Reads events from most recent to least recent (earliest), so the # first match is the most recent score. Perhaps change this? for logEvent in logEvents: parsed_event = parse.parse(self.score_format, logEvent.decode()) - if parsed_event and 'score' in parsed_event.named: - return float(parsed_event['score']) - self.log.info('Score format present but no score found in logs...') - return None \ No newline at end of file + if parsed_event and "score" in parsed_event.named: + return float(parsed_event["score"]) + self.log.info("Score format present but no score found in logs...") + return None diff --git a/demos/dvid_ffn/block_params.yml b/demos/dvid_ffn/block_params.yml new file mode 100644 index 0000000..289ffa8 --- /dev/null +++ b/demos/dvid_ffn/block_params.yml @@ -0,0 +1,33 @@ +x: + range: + start: 2000 + stop: 2512 + step: 128 + parameters: + min: xmin + max: xmax + steps: + - dvid_pull_raw + - dvid_push_seg +y: + range: + start: 2000 + stop: 2512 + step: 128 + parameters: + min: ymin + max: ymax + steps: + - dvid_pull_raw + - dvid_push_seg +z: + range: + start: 2000 + stop: 2512 + step: 128 + parameters: + min: zmin + max: zmax + steps: + - dvid_pull_raw + - dvid_push_seg \ No newline at end of file diff --git a/demos/dvid_ffn/job_dvid_ffn.yml b/demos/dvid_ffn/job_dvid_ffn.yml new file mode 100644 index 0000000..7b70828 --- /dev/null +++ b/demos/dvid_ffn/job_dvid_ffn.yml @@ -0,0 +1,33 @@ +# DVID: +host_name: 3.209.156.251:8001 +uuid: 56e2e4251774426abdde8cdee4be747e +resource_name: validation +dtype_name: uint8 +type: uint8blk +resolution: 0 +xmin: 0 +xmax: 64 +ymin: 0 +ymax: 64 +zmin: 0 +zmax: 64 +pull_output_name: medulla7_raw.npy +resource_name_out: ffn_segmentation +dtype_name_out: uint64 +type_out: labelblk + +# FFN segmentation +image_mean: 128 +image_stddev: 33 +depth: 12 +fov_size: 33,33,33 +deltas: 8,8,8 +init_activation: 0.95 +pad_value: 0.05 +move_threshold: 0.9 +min_boundary_dist: 1,1,1 +segment_threshold: 0.6 +min_segment_size: 1000 +bound_start: 0,0,0 +bound_stop: 64,64,64 +outfile: medulla7_ffn_out.npy \ No newline at end of file diff --git a/demos/dvid_ffn/job_public.yml b/demos/dvid_ffn/job_public.yml new file mode 100644 index 0000000..0064372 --- /dev/null +++ b/demos/dvid_ffn/job_public.yml @@ -0,0 +1,30 @@ +# DVID: +host_name: emdata.janelia.org +uuid: 822524777d3048b8bd520043f90c1d28 +resource_name: grayscale +dtype_name: uint8 +type: uint8blk +resolution: 0 +xmin: 2000 +xmax: 2256 +ymin: 2000 +ymax: 2256 +zmin: 2000 +zmax: 2256 +pull_output_name: medulla7_raw.npy + +# FFN segmentation +image_mean: 128 +image_stddev: 33 +depth: 12 +fov_size: 33,33,33 +deltas: 8,8,8 +init_activation: 0.95 +pad_value: 0.05 +move_threshold: 0.9 +min_boundary_dist: 1,1,1 +segment_threshold: 0.6 +min_segment_size: 1000 +bound_start: 0,0,0 +bound_stop: 64,64,64 +outfile: medulla7_ffn_out.npy \ No newline at end of file diff --git a/demos/dvid_ffn/workflow_dvid_ffn.cwl b/demos/dvid_ffn/workflow_dvid_ffn.cwl new file mode 100644 index 0000000..93cfa4f --- /dev/null +++ b/demos/dvid_ffn/workflow_dvid_ffn.cwl @@ -0,0 +1,133 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner +## This workflow will make use of the general synapse and membrane detection cwl files, meaning the processes will happen on CPU rather than on GPU. Does not include Boss push steps. + +cwlVersion: v1.0 +class: Workflow +doc: local + +inputs: + # Inputs for DVID + host_name: string + uuid: string? + resource_name: string + dtype_name: string + type: string + resolution: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + pull_output_name: string + resource_name_out: string + dtype_name_out: string + type_out: string + + #Inputs for FFN + image_mean: string + image_stddev: string + depth: string + fov_size: string + deltas: string + init_activation: string + pad_value: string + move_threshold: string + min_boundary_dist: string + segment_threshold: string + min_segment_size: string + bound_start: string + bound_stop: string + outfile: string + +outputs: + pull_output_raw: + type: File + outputSource: dvid_pull_raw/pull_output + ffn_segmentation: + type: File + outputSource: ffn_segmentation/ffn_out + +steps: + dvid_pull_raw: + run: ../saber/dvid_access/dvid_pull.cwl + in: + host_name: host_name + uuid: uuid + resource_name: resource_name + dtype_name: dtype_name + type: type + resolution: resolution + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + output_name: pull_output_name + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: + [pull_output] + + ffn_segmentation: + run: ../saber/i2g/ffns/ffn_segmentation.cwl + in: + input: dvid_pull_raw/pull_output + image_mean: image_mean + image_stddev: image_stddev + depth: depth + fov_size: fov_size + deltas: deltas + init_activation: init_activation + pad_value: pad_value + move_threshold: move_threshold + min_boundary_dist: min_boundary_dist + segment_threshold: segment_threshold + min_segment_size: min_segment_size + bound_start: bound_start + bound_stop: bound_stop + outfile: outfile + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: [ffn_out] + + dvid_push_seg: + run: ../saber/dvid_access/dvid_push.cwl + in: + input: ffn_segmentation/ffn_out + host_name: host_name + resource_name: resource_name_out + dtype_name: dtype_name_out + type: type_out + resolution: resolution + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: + [] \ No newline at end of file diff --git a/demos/dvid_ffn/workflow_public.cwl b/demos/dvid_ffn/workflow_public.cwl new file mode 100644 index 0000000..497a7ee --- /dev/null +++ b/demos/dvid_ffn/workflow_public.cwl @@ -0,0 +1,108 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner +## This workflow will make use of the general synapse and membrane detection cwl files, meaning the processes will happen on CPU rather than on GPU. Does not include Boss push steps. + +cwlVersion: v1.0 +class: Workflow +doc: local + +inputs: + # Inputs for DVID + host_name: string + uuid: string? + resource_name: string + dtype_name: string + type: string + resolution: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + pull_output_name: string + + #Inputs for FFN + image_mean: string + image_stddev: string + depth: string + fov_size: string + deltas: string + init_activation: string + pad_value: string + move_threshold: string + min_boundary_dist: string + segment_threshold: string + min_segment_size: string + bound_start: string + bound_stop: string + outfile: string + +outputs: + pull_output_raw: + type: File + outputSource: dvid_pull_raw/pull_output + ffn_segmentation: + type: File + outputSource: ffn_segmentation/ffn_out + +steps: + dvid_pull_raw: + run: ../saber/dvid_access/dvid_pull.cwl + in: + host_name: host_name + uuid: uuid + resource_name: resource_name + dtype_name: dtype_name + type: type + resolution: resolution + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + output_name: pull_output_name + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: + [pull_output] + + ffn_segmentation: + run: ../saber/i2g/ffns/ffn_segmentation.cwl + in: + input: dvid_pull_raw/pull_output + image_mean: image_mean + image_stddev: image_stddev + depth: depth + fov_size: fov_size + deltas: deltas + init_activation: init_activation + pad_value: pad_value + move_threshold: move_threshold + min_boundary_dist: min_boundary_dist + segment_threshold: segment_threshold + min_segment_size: min_segment_size + bound_start: bound_start + bound_stop: bound_stop + outfile: outfile + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: [ffn_out] diff --git a/demos/em_pipelines/job_i2g.yaml b/demos/em_pipelines/job_i2g.yaml new file mode 100644 index 0000000..419649d --- /dev/null +++ b/demos/em_pipelines/job_i2g.yaml @@ -0,0 +1,61 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# BOSS: +#------------------------------------------------ +# general: +host_bossdb: api.bossdb.io +token_bossdb: public +coll_name: Kasthuri +exp_name: ac4 +coord_name: ac4-cf +resolution: 0 +xmin: 0 +xmax: 512 +ymin: 0 +ymax: 512 +zmin: 0 +zmax: 50 +padding: 0 +onesided: 0 +#inputs: +in_chan_name_raw: em +itype_name_in: image +dtype_name_in: uint8 +#outputs +pull_output_name_raw: pull_output_raw.npy + + +#PROCESSING: +#------------------------------------------------ +#General: +width: 512 +height: 512 +mode: synapse +#Synapse_detection +synapse_output: synapse_output.npy +#Membrane_detection +membrane_output: membrane_output.npy +#neuron_segmentation: +train_file: ./trained_classifier.pkl +neuron_output: neuron_output.npy +mode: synapse +neuron_mode: 1 +agg_threshold: "0.5" +seeds_cc_threshold: "5" +#syn assoc +assoc_output_name: edge_list.pkl +assoc_output_name_noneu: edge_list_noneu.pkl +_saber_bucket: saber-batch + diff --git a/demos/em_pipelines/job_synapse_threshold.yaml b/demos/em_pipelines/job_synapse_threshold.yaml new file mode 100644 index 0000000..cd4f1c1 --- /dev/null +++ b/demos/em_pipelines/job_synapse_threshold.yaml @@ -0,0 +1,33 @@ +#Boss files +host_name: api.bossdb.io +coord: ac4-cf +token: public +coll: Kasthuri +exp: ac4 +chan_labels: synapse +chan_img: em +dtype_img: uint8 +dtype_lbl: uint64 +itype_name: image +res: 0 +xmin: 0 +xmax: 256 +ymin: 0 +ymax: 256 +zmin: 0 +zmax: 50 +padding: 0 + +#threshold input +threshold: 0.5 + +# synapse detection +width: 256 +height: 256 +mode: synapse + +#Output File Names +raw_pull_output_name: pull_output.npy +anno_pull_output_name: anno_output.npy +synapse_output_name: synapse_output.npy +threshold_output_name: threshold_output.npy \ No newline at end of file diff --git a/demos/em_pipelines/synapse_threshold.cwl b/demos/em_pipelines/synapse_threshold.cwl new file mode 100644 index 0000000..aafc77f --- /dev/null +++ b/demos/em_pipelines/synapse_threshold.cwl @@ -0,0 +1,147 @@ +# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cwlVersion: v1.0 +class: Workflow +doc: local + +cwlVersion: v1.0 +class: Workflow +inputs: + coord: string + token: string + host_name: string + coll: string + exp: string + chan_labels: string + chan_img: string + dtype_img: string + dtype_lbl: string + itype_name: string + padding: int + res: int + xmin: int + xmax: int + ymin: int + ymax: int + zmin: int + zmax: int + raw_pull_output_name: string + anno_pull_output_name: string + + width: int + height: int + mode: string + synapse_output_name: string + + threshold: string + threshold_output_name: string + +outputs: + pull_output: + type: File + outputSource: raw_boss_pull/pull_output + anno_output: + type: File + outputSource: anno_boss_pull/pull_output + synapse_detection: + type: File + outputSource: synapse_detection/synapse_detection_out + threshold_output: + type: File + outputSource: threshold/threshold_out + +steps: + raw_boss_pull: + run: ../../saber/boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll + exp_name: exp + chan_name: chan_img + dtype_name: dtype_img + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + itype_name: itype_name + padding: padding + output_name: raw_pull_output_name + coord_name: coord + out: + [pull_output] + hints: + saber: + local: True + file_path: /home/xenesd1-a/saber/output + + anno_boss_pull: + run: ../../saber/boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll + exp_name: exp + chan_name: chan_labels + dtype_name: dtype_lbl + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + itype_name: itype_name + padding: padding + output_name: anno_pull_output_name + coord_name: coord + out: + [pull_output] + hints: + saber: + local: True + file_path: /home/xenesd1-a/saber/output + + synapse_detection: + run: ../../saber/i2g/detection/synapse_detection.cwl + in: + input: raw_boss_pull/pull_output + width: width + height: height + mode: mode + output: synapse_output_name + hints: + saber: + local: True + file_path: /home/xenesd1-a/saber/output + out: [synapse_detection_out] + + threshold: + run: ../../saber/postprocessing/threshold/threshold.cwl + in: + input: synapse_detection/synapse_detection_out + groundtruth: anno_boss_pull/pull_output + threshold: threshold + outfile: threshold_output_name + out: + [threshold_out] + hints: + saber: + local: True + file_path: /home/xenesd1-a/saber/output + score_format: "F1: {score}" \ No newline at end of file diff --git a/demos/em_pipelines/workflow_i2g.cwl b/demos/em_pipelines/workflow_i2g.cwl new file mode 100644 index 0000000..3e94a49 --- /dev/null +++ b/demos/em_pipelines/workflow_i2g.cwl @@ -0,0 +1,163 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner +## This workflow will make use of the general synapse and membrane detection cwl files, meaning the processes will happen on CPU rather than on GPU. Does not include Boss push steps. + +cwlVersion: v1.0 +class: Workflow +doc: local + +inputs: + + # Inputs for BOSS + host_bossdb: string + token_bossdb: string? + coll_name: string + exp_name: string + in_chan_name_raw: string + dtype_name_in: string + itype_name_in: string + coord_name: string + resolution: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + padding: int? + + #Inputs for processing + width: int? + height: int? + mode: string + + #Inputs for neuron_segmentation + train_file: File? + neuron_mode: string + seeds_cc_threshold: string + agg_threshold: string + + #Inputs for output names: + pull_output_name_raw: string + synapse_output: string + membrane_output: string + neuron_output: string + assoc_output_name: string + assoc_output_name_noneu: string + +outputs: + pull_output_raw: + type: File + outputSource: boss_pull_raw/pull_output + synapse_detection: + type: File + outputSource: synapse_detection/synapse_detection_out + membrane_detection: + type: File + outputSource: membrane_detection/membrane_detection_out + neuron_segmentation: + type: File + outputSource: neuron_segmentation/neuron_segmentation_out + assoc_output: + type: File + outputSource: assoc/assoc_output + assoc_output_noneu: + type: File + outputSource: assoc/assoc_output_noneu + +steps: + boss_pull_raw: + run: ../../saber/boss_access/boss_pull_nos3.cwl + in: + token: token_bossdb + host_name: host_bossdb + coll_name: coll_name + exp_name: exp_name + chan_name: in_chan_name_raw + dtype_name: dtype_name_in + itype_name: itype_name_in + resolution: resolution + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: pull_output_name_raw + coord_name: coord_name + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + out: + [pull_output] + + synapse_detection: + run: ../../saber/i2g/detection/synapse_detection_gpu.cwl + in: + input: boss_pull_raw/pull_output + width: width + height: height + mode: mode + output: synapse_output + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + out: [synapse_detection_out] + + membrane_detection: + run: ../../saber/i2g/detection/membrane_detection_gpu.cwl + in: + input: boss_pull_raw/pull_output + width: width + height: height + output: membrane_output + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + out: [membrane_detection_out] + + neuron_segmentation: + run: ../../saber/i2g/neuron_segmentation/neuron_segmentation.cwl + in: + prob_file: membrane_detection/membrane_detection_out + mode: neuron_mode + train_file: train_file + agg_threshold: agg_threshold + seeds_cc_threshold: seeds_cc_threshold + outfile: neuron_output + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + out: [neuron_segmentation_out] + + assoc: + run: ../../saber/i2g/seg_syn_association/assoc_local.cwl + in: + seg_file: neuron_segmentation/neuron_segmentation_out + syn_file: synapse_detection/synapse_detection_out + output_name: assoc_output_name + output_name_noneu: assoc_output_name_noneu + out: + [assoc_output,assoc_output_noneu] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs diff --git a/demos/local_input_example/job.yaml b/demos/local_input_example/job.yaml new file mode 100644 index 0000000..bdbd9ce --- /dev/null +++ b/demos/local_input_example/job.yaml @@ -0,0 +1,25 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +#General: +width: 160 +height: 160 +mode: synapse +#Synapse_detection +input: + class: File + path: test_dir/pull_output_raw.npy +synapse_output: synapse_output.npy + diff --git a/demos/local_input_example/local_input.cwl b/demos/local_input_example/local_input.cwl new file mode 100644 index 0000000..5ab7ba6 --- /dev/null +++ b/demos/local_input_example/local_input.cwl @@ -0,0 +1,51 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner +## This workflow will make use of the general synapse and membrane detection cwl files, meaning the processes will happen on CPU rather than on GPU. Does not include Boss push steps. + +cwlVersion: v1.0 +class: Workflow +doc: local + +inputs: + #Inputs for processing + width: int? + height: int? + mode: string + input: File + + #Inputs for output names: + synapse_output: string + +outputs: + synapse_detection: + type: File + outputSource: synapse_detection/synapse_detection_out + +steps: + + synapse_detection: + run: ../../../i2g/detection/synapse_detection.cwl + in: + input: input + width: width + height: height + mode: mode + output: synapse_output + hints: + saber: + local: True + file_path: "" + out: [synapse_detection_out] diff --git a/docker-compose-tools.yml b/docker-compose-tools.yml new file mode 100644 index 0000000..177eff8 --- /dev/null +++ b/docker-compose-tools.yml @@ -0,0 +1,70 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: '2.1' +services: + xbrain: + privileged: true + build: + context: ./saber/xbrain + dockerfile: Dockerfile + image: aplbrain/xbrain + xbrain_unets: + privileged: true + build: + context: ./saber/xbrain/unets + dockerfile: Dockerfile + image: aplbrain/unets + boss: + privileged: true + build: + context: ./saber/boss_access + dockerfile: Dockerfile + image: aplbrain/boss-access +# i2gdetect: +# privileged: true +# build: +# context: ./saber/i2g/detection +# dockerfile: Dockerfile +# image: aplbrain/i2gdetect +# i2gdetect_gpu: +# privileged: true +# build: +# context: ./saber/i2g/detection/gpu +# dockerfile: Dockerfile +# image: aplbrain/i2gdetect_gpu +# i2gmetrics: +# privileged: true +# build: +# context: ./saber/i2g/metric_computation +# dockerfile: Dockerfile +# image: aplbrain/i2gmetrics +# i2gseg: +# privileged: true +# build: +# context: ./saber/i2g/neuron_segmentation +# dockerfile: Dockerfile +# image: aplbrain/i2gseg +# i2gseg: +# privileged: true +# build: +# context: ./saber/i2g/neuron_segmentation +# dockerfile: Dockerfile +# image: aplbrain/i2gseg +# i2gassoc: +# privileged: true +# build: +# context: ./saber/i2g/seg_syn_association +# dockerfile: Dockerfile +# image: aplbrain/i2gassoc diff --git a/docker-compose.yml b/docker-compose.yml index e7b65ba..44d3a84 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,7 +24,7 @@ services: webserver: privileged: true build: - context: ./conduit + context: . dockerfile: WebDockerfile restart: always depends_on: @@ -45,21 +45,25 @@ services: - "8080:8080" command: webserver healthcheck: - test: ["CMD-SHELL", "[ -f /home/root/airflow-webserver.pid ]"] + test: ["CMD-SHELL", "[ -f /root/airflow-webserver.pid ]"] interval: 30s timeout: 30s retries: 3 cwl_parser: privileged: true build: - context: ./conduit + context: . dockerfile: CWLDockerfile depends_on: - webserver + environment: + - DOCKER_CLIENT_TIMEOUT=120 + - COMPOSE_HTTP_TIMEOUT=120 volumes: - ./conduit:/conduit - ~/.aws:/root/.aws - ./saber/:/saber + - ./demos:/demos - /var/run/docker.sock:/var/run/docker.sock command: tail -F root diff --git a/saber/boss_access/boss_access.py b/saber/boss_access/boss_access.py index 0bc7dc9..cb09928 100755 --- a/saber/boss_access/boss_access.py +++ b/saber/boss_access/boss_access.py @@ -19,47 +19,49 @@ import os import sys import tempfile -import math +import math import boto3 from intern.remote.boss import BossRemote from intern.resource.boss.resource import * import numpy as np from requests import HTTPError + def _generate_config(token, args): - boss_host = os.getenv('BOSSDB_HOST', args.host) + boss_host = os.getenv("BOSSDB_HOST", args.host) print(boss_host) cfg = configparser.ConfigParser() - cfg['Project Service'] = {} - cfg['Metadata Service'] = {} - cfg['Volume Service'] = {} + cfg["Project Service"] = {} + cfg["Metadata Service"] = {} + cfg["Volume Service"] = {} - project = cfg['Project Service'] - project['protocol'] = 'https' - project['host'] = boss_host - project['token'] = token + project = cfg["Project Service"] + project["protocol"] = "https" + project["host"] = boss_host + project["token"] = token - metadata = cfg['Metadata Service'] - metadata['protocol'] = 'https' - metadata['host'] = boss_host - metadata['token'] = token + metadata = cfg["Metadata Service"] + metadata["protocol"] = "https" + metadata["host"] = boss_host + metadata["token"] = token - volume = cfg['Volume Service'] - volume['protocol'] = 'https' - volume['host'] = boss_host - volume['token'] = token + volume = cfg["Volume Service"] + volume["protocol"] = "https" + volume["host"] = boss_host + volume["token"] = token return cfg + def boss_pull_cutout(args): if args.config: rmt = BossRemote(args.config) else: cfg = _generate_config(args.token, args) - with open('intern.cfg', 'w') as f: + with open("intern.cfg", "w") as f: cfg.write(f) - rmt = BossRemote('intern.cfg') + rmt = BossRemote("intern.cfg") COLL_NAME = args.coll EXP_NAME = args.exp @@ -67,12 +69,13 @@ def boss_pull_cutout(args): # Create or get a channel to write to chan_setup = ChannelResource( - CHAN_NAME, COLL_NAME, EXP_NAME, type=args.itype, datatype=args.dtype) + CHAN_NAME, COLL_NAME, EXP_NAME, type=args.itype, datatype=args.dtype + ) try: chan_actual = rmt.get_project(chan_setup) except HTTPError: chan_actual = rmt.create_project(chan_setup) - #get coordinate frame to determine padding bounds + # get coordinate frame to determine padding bounds cfr = CoordinateFrameResource(args.coord) cfr_actual = rmt.get_project(cfr) x_min_bound = cfr_actual.x_start @@ -81,65 +84,65 @@ def boss_pull_cutout(args): y_max_bound = cfr_actual.y_stop z_min_bound = cfr_actual.z_start z_max_bound = cfr_actual.z_stop - - print('Data model setup.') - - xmin = np.max([x_min_bound,args.xmin-args.padding]) - xmax = np.min([x_max_bound,args.xmax+args.padding]) - x_rng = [xmin,xmax] - ymin = np.max([y_min_bound,args.ymin-args.padding]) - ymax = np.min([y_max_bound,args.ymax+args.padding]) - y_rng = [ymin,ymax] - zmin = np.max([z_min_bound,args.zmin-args.padding]) - zmax = np.min([z_max_bound,args.zmax+args.padding]) - z_rng = [zmin,zmax] + + print("Data model setup.") + + xmin = np.max([x_min_bound, args.xmin - args.padding]) + xmax = np.min([x_max_bound, args.xmax + args.padding]) + x_rng = [xmin, xmax] + ymin = np.max([y_min_bound, args.ymin - args.padding]) + ymax = np.min([y_max_bound, args.ymax + args.padding]) + y_rng = [ymin, ymax] + zmin = np.max([z_min_bound, args.zmin - args.padding]) + zmax = np.min([z_max_bound, args.zmax + args.padding]) + z_rng = [zmin, zmax] # Verify that the cutout uploaded correctly. attempts = 0 while attempts < 3: try: cutout_data = rmt.get_cutout(chan_actual, args.res, x_rng, y_rng, z_rng) break - except HTTPError as e: + except HTTPError as e: if attempts < 3: attempts += 1 print("Obtained HTTP error from server. Trial {}".format(attempts)) else: print("Failed 3 times: {}".format(e)) - #Data will be in Z,Y,X format - #Change to X,Y,Z for pipeline - cutout_data = np.transpose(cutout_data,(2,1,0)) + # Data will be in Z,Y,X format + # Change to X,Y,Z for pipeline + cutout_data = np.transpose(cutout_data, (2, 1, 0)) def _upload(f): - print('Uploading to s3:/{}/{}'.format(args.bucket, args.output)) - s3 = boto3.resource('s3') + print("Uploading to s3:/{}/{}".format(args.bucket, args.output)) + s3 = boto3.resource("s3") f.seek(0, 0) s3.Object(args.bucket, args.output).put(Body=f) - + # Clean up. if args.bucket and args.s3_only: with tempfile.TemporaryFile() as f: np.save(f, cutout_data) _upload(f) else: - with open(args.output, 'w+b') as f: + with open(args.output, "w+b") as f: np.save(f, cutout_data) if args.bucket: _upload(f) - -#here we push a subset of padded data back to BOSS + +# here we push a subset of padded data back to BOSS def boss_push_cutout(args): if args.config: rmt = BossRemote(args.config) else: cfg = _generate_config(args.token, args) - with open('intern.cfg', 'w') as f: + with open("intern.cfg", "w") as f: cfg.write(f) - rmt = BossRemote('intern.cfg') + rmt = BossRemote("intern.cfg") - #data is desired range + # data is desired range if args.bucket: - s3 = boto3.resource('s3') + s3 = boto3.resource("s3") with tempfile.TemporaryFile() as f: s3.Bucket(args.bucket).download_fileobj(args.input, f) f.seek(0, 0) @@ -148,13 +151,16 @@ def boss_push_cutout(args): data = np.load(args.input) numpyType = np.uint8 - if args.dtype=="uint32": + if args.dtype == "uint32": numpyType = np.uint32 - elif args.dtype=="uint64": + elif args.dtype == "uint64": numpyType = np.uint64 - if data.dtype != args.dtype: + if data.dtype != args.dtype: data = data.astype(numpyType) + sources = [] + if args.source: + sources.append(args.source) COLL_NAME = args.coll EXP_NAME = args.exp @@ -162,13 +168,19 @@ def boss_push_cutout(args): # Create or get a channel to write to chan_setup = ChannelResource( - CHAN_NAME, COLL_NAME, EXP_NAME, type=args.itype, datatype=args.dtype) + CHAN_NAME, + COLL_NAME, + EXP_NAME, + type=args.itype, + datatype=args.dtype, + sources=sources, + ) try: chan_actual = rmt.get_project(chan_setup) except HTTPError: chan_actual = rmt.create_project(chan_setup) - #get coordinate frame to determine padding bounds + # get coordinate frame to determine padding bounds cfr = CoordinateFrameResource(args.coord) cfr_actual = rmt.get_project(cfr) x_min_bound = cfr_actual.x_start @@ -178,12 +190,12 @@ def boss_push_cutout(args): z_min_bound = cfr_actual.z_start z_max_bound = cfr_actual.z_stop - print('Data model setup.') + print("Data model setup.") # Ranges use the Python convention where the number after the : is the stop # value. Thus, x_rng specifies x values where: 0 <= x < 8. - data_shape = data.shape #with padding, will be bigger than needed - #find data cutoffs to get rid of padding + data_shape = data.shape # with padding, will be bigger than needed + # find data cutoffs to get rid of padding # if nmin = 0, this means that the data wasn't padded on there to begin with xstart = args.padding if args.xmin != 0 else 0 ystart = args.padding if args.ymin != 0 else 0 @@ -198,22 +210,22 @@ def boss_push_cutout(args): # yend = np.max([data.shape[1]-args.padding,data.shape[1]-(y_max_bound-args.ymax)]) # zstart = np.min([args.padding,args.zmin-z_min_bound]) # zend = np.max([data.shape[2]-args.padding,data.shape[2]-(z_max_bound-args.zmax)]) - #get range which will be uploaded + # get range which will be uploaded x_rng = [args.xmin, args.xmax] y_rng = [args.ymin, args.ymax] z_rng = [args.zmin, args.zmax] - #Pipeline Data will be in X,Y,Z format - #Change to Z,Y,X for upload - data = np.transpose(data,(2,1,0)) - data = data[zstart:zend,ystart:yend,xstart:xend] - data = data.copy(order='C') + # Pipeline Data will be in X,Y,Z format + # Change to Z,Y,X for upload + data = np.transpose(data, (2, 1, 0)) + data = data[zstart:zend, ystart:yend, xstart:xend] + data = data.copy(order="C") # Verify that the cutout uploaded correctly. attempts = 0 while attempts < 3: try: rmt.create_cutout(chan_actual, args.res, x_rng, y_rng, z_rng, data) break - except HTTPError as e: + except HTTPError as e: if attempts < 3: attempts += 1 print("These are the dimensions: ") @@ -230,7 +242,8 @@ def boss_push_cutout(args): raise Exception("Failed 3 times: {}".format(e)) # Clean up. -'''boss_merge_xbrain + +"""boss_merge_xbrain Here we push a subset of padded data back to BOSS, merging with existing data in BOSS in padded region Merging here is for XBrain only, will need to work more on EM Here we are pushing a block of data into a channel which has other blocks of data, possibly already in it @@ -256,39 +269,40 @@ def boss_push_cutout(args): Side effect: Data is uploaded to the BOSS channel, covering [xmin:xmax,ymin:ymax,zmin:zmax]. Any padded regions around this block are also merged -''' +""" + + def boss_merge_xbrain(args): # Verify that the cutout uploaded correctly. - def pull_margin_cutout(chan_actual,x_rng,y_rng,z_rng): + def pull_margin_cutout(chan_actual, x_rng, y_rng, z_rng): attempts = 0 while attempts < 3: try: cutout_data = rmt.get_cutout(chan_actual, 0, x_rng, y_rng, z_rng) break - except HTTPError as e: + except HTTPError as e: if attempts < 3: attempts += 1 print("Obtained HTTP error from server. Trial {}".format(attempts)) else: print("Failed 3 times: {}".format(e)) - #Data will be in Z,Y,X format - #Change to X,Y,Z for pipeline - cutout_data = np.transpose(cutout_data,(2,1,0)) + # Data will be in Z,Y,X format + # Change to X,Y,Z for pipeline + cutout_data = np.transpose(cutout_data, (2, 1, 0)) return cutout_data - templatesize = args.templatesize if args.config: rmt = BossRemote(args.config) else: cfg = _generate_config(args.token, args) - with open('intern.cfg', 'w') as f: + with open("intern.cfg", "w") as f: cfg.write(f) - rmt = BossRemote('intern.cfg') + rmt = BossRemote("intern.cfg") - #data is desired range + # data is desired range if args.bucket: - s3 = boto3.resource('s3') + s3 = boto3.resource("s3") with tempfile.TemporaryFile() as f: s3.Bucket(args.bucket).download_fileobj(args.input, f) f.seek(0, 0) @@ -307,14 +321,15 @@ def pull_margin_cutout(chan_actual,x_rng,y_rng,z_rng): # Create or get a channel to write to chan_setup = ChannelResource( - CHAN_NAME, COLL_NAME, EXP_NAME, type=args.itype, datatype=args.dtype) + CHAN_NAME, COLL_NAME, EXP_NAME, type=args.itype, datatype=args.dtype + ) try: chan_actual = rmt.get_project(chan_setup) except HTTPError: chan_actual = rmt.create_project(chan_setup) - #get coordinate frame to determine padding bounds - cfr = CoordinateFrameResource(args.coord) + # get coordinate frame to determine padding bounds + cfr = CoordinateFrameResource(args.coord) cfr_actual = rmt.get_project(cfr) x_min_bound = cfr_actual.x_start x_max_bound = cfr_actual.x_stop @@ -323,231 +338,416 @@ def pull_margin_cutout(chan_actual,x_rng,y_rng,z_rng): z_min_bound = cfr_actual.z_start z_max_bound = cfr_actual.z_stop - #coordinates of data block in original coordinate frame, before padding + # coordinates of data block in original coordinate frame, before padding x_block = [args.xmin, args.xmax] y_block = [args.ymin, args.ymax] z_block = [args.zmin, args.zmax] - #Coordinates of data block with padding in original coordinate frame - x_block_pad = [np.amax([args.xmin-args.padding,x_min_bound]), np.amin([args.xmax+args.padding,x_max_bound])] - y_block_pad = [np.amax([args.ymin-args.padding,y_min_bound]), np.amin([args.ymax+args.padding,y_max_bound])] - z_block_pad = [np.amax([args.zmin-args.padding,z_min_bound]), np.amin([args.zmax+args.padding,z_max_bound])] - - #Coordinates of core data block in local coordinate frame - xstart = np.amin([args.padding,args.xmin-x_min_bound]) - xend = np.amax([data.shape[0]-args.padding,data.shape[0]-(x_max_bound-args.xmax)]) - ystart = np.amin([args.padding,args.ymin-y_min_bound]) - yend = np.amax([data.shape[1]-args.padding,data.shape[1]-(y_max_bound-args.ymax)]) - zstart = np.amin([args.padding,args.zmin-z_min_bound]) - zend = np.amax([data.shape[2]-args.padding,data.shape[2]-(z_max_bound-args.zmax)]) + # Coordinates of data block with padding in original coordinate frame + x_block_pad = [ + np.amax([args.xmin - args.padding, x_min_bound]), + np.amin([args.xmax + args.padding, x_max_bound]), + ] + y_block_pad = [ + np.amax([args.ymin - args.padding, y_min_bound]), + np.amin([args.ymax + args.padding, y_max_bound]), + ] + z_block_pad = [ + np.amax([args.zmin - args.padding, z_min_bound]), + np.amin([args.zmax + args.padding, z_max_bound]), + ] + + # Coordinates of core data block in local coordinate frame + xstart = np.amin([args.padding, args.xmin - x_min_bound]) + xend = np.amax( + [data.shape[0] - args.padding, data.shape[0] - (x_max_bound - args.xmax)] + ) + ystart = np.amin([args.padding, args.ymin - y_min_bound]) + yend = np.amax( + [data.shape[1] - args.padding, data.shape[1] - (y_max_bound - args.ymax)] + ) + zstart = np.amin([args.padding, args.zmin - z_min_bound]) + zend = np.amax( + [data.shape[2] - args.padding, data.shape[2] - (z_max_bound - args.zmax)] + ) - print('Data model setup.') - #Template size to decide which centroids to eliminate + print("Data model setup.") + # Template size to decide which centroids to eliminate # Ranges use the Python convention where the number after the : is the stop # value. Thus, x_rng specifies x values where: 0 <= x < 8. - if args.onesided: - #Only merge on the max side, to prevent duplication of detection - #Binarize Map - data[np.where(data>0)]=1 - - #Search through centroids - #On side of max values, keep anything where centroid is in padded region - #On side of min values, remove anything that is partially in padded region (covered by another block) - n_centers,_ = centroids.shape #n by 4 + if args.onesided: + # Only merge on the max side, to prevent duplication of detection + # Binarize Map + data[np.where(data > 0)] = 1 + + # Search through centroids + # On side of max values, keep anything where centroid is in padded region + # On side of min values, remove anything that is partially in padded region (covered by another block) + n_centers, _ = centroids.shape # n by 4 bad_inds = [] - for i in range(0,n_centers): - if centroids[i,0] < xstart or centroids[i,0] - templatesize/2 > xend: + for i in range(0, n_centers): + if centroids[i, 0] < xstart or centroids[i, 0] - templatesize / 2 > xend: bad_inds.append(i) - elif centroids[i,1] < ystart or centroids[i,1] - templatesize/2 > yend: + elif centroids[i, 1] < ystart or centroids[i, 1] - templatesize / 2 > yend: bad_inds.append(i) - elif centroids[i,2] < zstart or centroids[i,2] - templatesize/2 > zend: + elif centroids[i, 2] < zstart or centroids[i, 2] - templatesize / 2 > zend: bad_inds.append(i) - centroids_out = np.delete(centroids,bad_inds, axis=0) - #translate into global coordinates from local data block - centroids_out[:,0] = centroids_out[:,0] - xstart + args.xmin - centroids_out[:,1] = centroids_out[:,1] - ystart + args.ymin - centroids_out[:,2] = centroids_out[:,2] - zstart + args.zmin - - #Eliminate any cells form data which overlap with the padding edge + centroids_out = np.delete(centroids, bad_inds, axis=0) + # translate into global coordinates from local data block + centroids_out[:, 0] = centroids_out[:, 0] - xstart + args.xmin + centroids_out[:, 1] = centroids_out[:, 1] - ystart + args.ymin + centroids_out[:, 2] = centroids_out[:, 2] - zstart + args.zmin + + # Eliminate any cells form data which overlap with the padding edge for ind in bad_inds: - xi = np.array([centroids[ind,0]-np.ceil(templatesize/2),centroids[ind,0]+np.ceil(templatesize/2)]).astype(int) - yi = np.array([centroids[ind,1]-np.ceil(templatesize/2),centroids[ind,1]+np.ceil(templatesize/2)]).astype(int) - zi = np.array([centroids[ind,2]-np.ceil(templatesize/2),centroids[ind,2]+np.ceil(templatesize/2)]).astype(int) - data[xi,yi,zi] = 0 - #Keep any interior cells, any which overlap original boundary and not padding - #Pull down existing boundary area, if area is valid - #Test side 4 - if xend0)]=1 + # Binarize Map + data[np.where(data > 0)] = 1 - #Search through centroids - n_centers,_ = centroids.shape #n by 4 + # Search through centroids + n_centers, _ = centroids.shape # n by 4 bad_inds = [] - for i in range(0,n_centers): - if centroids[i,0] + templatesize/2 < xstart or centroids[i,0] - templatesize/2 > xend: + for i in range(0, n_centers): + if ( + centroids[i, 0] + templatesize / 2 < xstart + or centroids[i, 0] - templatesize / 2 > xend + ): bad_inds.append(i) - elif centroids[i,1] + templatesize/2 < ystart or centroids[i,1] - templatesize/2 > yend: + elif ( + centroids[i, 1] + templatesize / 2 < ystart + or centroids[i, 1] - templatesize / 2 > yend + ): bad_inds.append(i) - elif centroids[i,2] + templatesize/2 < zstart or centroids[i,2] - templatesize/2 > zend: + elif ( + centroids[i, 2] + templatesize / 2 < zstart + or centroids[i, 2] - templatesize / 2 > zend + ): bad_inds.append(i) - centroids_out = np.delete(centroids,bad_inds, axis=0) - #translate into global coordinates from local data block - centroids_out[:,0] = centroids_out[:,0] - xstart + args.xmin - centroids_out[:,1] = centroids_out[:,1] - ystart + args.ymin - centroids_out[:,2] = centroids_out[:,2] - zstart + args.zmin - - #Eliminate any cells form data which overlap with the padding edge + centroids_out = np.delete(centroids, bad_inds, axis=0) + # translate into global coordinates from local data block + centroids_out[:, 0] = centroids_out[:, 0] - xstart + args.xmin + centroids_out[:, 1] = centroids_out[:, 1] - ystart + args.ymin + centroids_out[:, 2] = centroids_out[:, 2] - zstart + args.zmin + + # Eliminate any cells form data which overlap with the padding edge for ind in bad_inds: - xi = np.array([centroids[ind,0]-np.ceil(templatesize/2),centroids[ind,0]+np.ceil(templatesize/2)]).astype(int) - yi = np.array([centroids[ind,1]-np.ceil(templatesize/2),centroids[ind,1]+np.ceil(templatesize/2)]).astype(int) - zi = np.array([centroids[ind,2]-np.ceil(templatesize/2),centroids[ind,2]+np.ceil(templatesize/2)]).astype(int) - data[xi,yi,zi] = 0 - #Keep any interior cells, any which overlap original boundary and not padding - #Pull down existing boundary area, if area is valid - #Test side 1 - if xstart>0: #There is padding on side 1 of cube [0:pad,0:ymax+2*pad,0:zmax+2*pad] - margin = pull_margin_cutout(chan_actual,[x_block_pad[0],x_block[0]],y_block_pad,z_block_pad) - data[0:xstart,:,:] = np.maximum(data[0:xstart,:,:],margin) - #Test side 2 - if ystart>0: #There is padding on side 2 of cube [pad:xmax+2*pad,0:pad,pad:zmax+2*pad] - margin = pull_margin_cutout(chan_actual,[x_block[0],x_block_pad[1]],[y_block_pad[0],y_block[0]],[z_block[0],z_block_pad[1]]) - data[xstart:data.shape[0],0:ystart,zstart:data.shape[2]] = np.maximum(data[xstart:data.shape[0],0:ystart,zstart:data.shape[1]],margin) - #Test side 3 - if zstart>0: #There is padding on side 3 of cube [pad:xmax+2*pad,pad:ymax+2*pad,0:pad] - margin = pull_margin_cutout(chan_actual,[x_block[0],x_block_pad[1]],[y_block[0],y_block_pad[1]],[z_block_pad[0],z_block[0]]) - data[xstart:data.shape[0],ystart:data.shape[1],0:zstart] = np.maximum(data[xstart:data.shape[0],ystart:data.shape[1],0:zstart],margin) - #Test side 4 - if xend 0 + ): # There is padding on side 1 of cube [0:pad,0:ymax+2*pad,0:zmax+2*pad] + margin = pull_margin_cutout( + chan_actual, [x_block_pad[0], x_block[0]], y_block_pad, z_block_pad + ) + data[0:xstart, :, :] = np.maximum(data[0:xstart, :, :], margin) + # Test side 2 + if ( + ystart > 0 + ): # There is padding on side 2 of cube [pad:xmax+2*pad,0:pad,pad:zmax+2*pad] + margin = pull_margin_cutout( + chan_actual, + [x_block[0], x_block_pad[1]], + [y_block_pad[0], y_block[0]], + [z_block[0], z_block_pad[1]], + ) + data[xstart : data.shape[0], 0:ystart, zstart : data.shape[2]] = np.maximum( + data[xstart : data.shape[0], 0:ystart, zstart : data.shape[1]], margin + ) + # Test side 3 + if ( + zstart > 0 + ): # There is padding on side 3 of cube [pad:xmax+2*pad,pad:ymax+2*pad,0:pad] + margin = pull_margin_cutout( + chan_actual, + [x_block[0], x_block_pad[1]], + [y_block[0], y_block_pad[1]], + [z_block_pad[0], z_block[0]], + ) + data[xstart : data.shape[0], ystart : data.shape[1], 0:zstart] = np.maximum( + data[xstart : data.shape[0], ystart : data.shape[1], 0:zstart], margin + ) + # Test side 4 + if ( + xend < data.shape[0] + ): # There is padding on side 4 of cube [xmax+pad:xmax+2*pad,pad:ymax+2*pad,pad:zmax+2*pad] + margin = pull_margin_cutout( + chan_actual, + [x_block[1], x_block_pad[1]], + [y_block[0], y_block_pad[1]], + [z_block[0], z_block_pad[1]], + ) + data[ + xend : data.shape[0], ystart : data.shape[1], zstart : data.shape[2] + ] = np.maximum( + data[ + xend : data.shape[0], ystart : data.shape[1], zstart : data.shape[2] + ], + margin, + ) + # Test side 5 + if ( + yend < data.shape[1] + ): # There is padding on side 5 of cube [pad:xmax+2*pad,ymax+pad:ymax+2*pad,pad:zmax+2*pad] + margin = pull_margin_cutout( + chan_actual, + [x_block[0], x_block_pad[1]], + [y_block[1], y_block_pad[1]], + [z_block[0], z_block_pad[1]], + ) + data[ + xstart : data.shape[0], yend : data.shape[1], zstart : data.shape[2] + ] = np.maximum( + data[ + xstart : data.shape[0], yend : data.shape[1], zstart : data.shape[2] + ], + margin, + ) + # Test side 6 + if ( + zend < data.shape[2] + ): # There is padding on side 4 of cube [pad:xmax+2*pad,pad:ymax+2*pad,zmax+pad:zmax+2*pad] + margin = pull_margin_cutout( + chan_actual, + [x_block[0], x_block_pad[1]], + [y_block[0], y_block_pad[1]], + [z_block[1], z_block_pad[1]], + ) + data[ + xstart : data.shape[0], ystart : data.shape[1], zend : data.shape[2] + ] = np.maximum( + data[ + xstart : data.shape[0], ystart : data.shape[1], zend : data.shape[2] + ], + margin, + ) + + # push results over entire padded area + # Pipeline Data will be in X,Y,Z format + # Change to Z,Y,X for upload + data = np.transpose(data, (2, 1, 0)) + data = data.copy(order="C").astype(eval("np.{}".format(args.dtype))) # Verify that the cutout uploaded correctly. rmt.create_cutout(chan_actual, 0, x_block_pad, y_block_pad, z_block_pad, data) # Clean up. def _upload(f): - print('Uploading to s3:/{}/{}'.format(args.bucket, args.output)) - s3 = boto3.resource('s3') + print("Uploading to s3:/{}/{}".format(args.bucket, args.output)) + s3 = boto3.resource("s3") f.seek(0, 0) s3.Object(args.bucket, args.output).put(Body=f) - + # Clean up. if args.bucket and args.s3_only: with tempfile.TemporaryFile() as f: np.save(f, centroids_out) _upload(f) else: - print('Saving output') - with open(args.output, 'w+b') as f: + print("Saving output") + with open(args.output, "w+b") as f: np.save(f, centroids_out) if args.bucket: _upload(f) return - - def main(): - parser = argparse.ArgumentParser(description='boss processing script') + parser = argparse.ArgumentParser(description="boss processing script") parent_parser = argparse.ArgumentParser(add_help=False) - subparsers = parser.add_subparsers(title='commands') + subparsers = parser.add_subparsers(title="commands") parser.set_defaults(func=lambda _: parser.print_help()) group = parent_parser.add_mutually_exclusive_group(required=True) - group.add_argument('-c', '--config', default=None, help='Boss config file') - group.add_argument('-t', '--token', default=None, help='Boss API Token') - parent_parser.add_argument('-b', '--bucket', default=None, help='S3 bucket to save to or load from') - - parent_parser.add_argument('--coll', required=True, help='Coll name') - parent_parser.add_argument('--exp', required=True, help='EXP_NAME') - parent_parser.add_argument('--chan', required=True, help='CHAN_NAME') - parent_parser.add_argument('--coord', required=True, help='Coordinate_Frame') - parent_parser.add_argument('--dtype', required=True, help='data type') - parent_parser.add_argument('--itype', required=True, help='I type') - parent_parser.add_argument('--host', required=False, default= 'api.bossdb.org', help='Name of boss host: api.bossdb.org') - - parent_parser.add_argument('--res', type=int, help='Resolution') - parent_parser.add_argument('--xmin', type=int, default=0, help='Xmin') - parent_parser.add_argument('--xmax', type=int, default=1, help='Xmax') - parent_parser.add_argument('--ymin', type=int, default=0, help='Ymin') - parent_parser.add_argument('--ymax', type=int, default=1, help='Ymax') - parent_parser.add_argument('--zmin', type=int, default=0, help='Zmin') - parent_parser.add_argument('--zmax', type=int, default=1, help='Zmax') - parent_parser.add_argument('--padding', type=int, default=0, help='padding') - parent_parser.add_argument('--onesided', type=int, default=0, help='flag for one-sided padding') #indicates whether padding is one-sided or two-sided + group.add_argument("-c", "--config", default=None, help="Boss config file") + group.add_argument("-t", "--token", default=None, help="Boss API Token") + parent_parser.add_argument( + "-b", "--bucket", default=None, help="S3 bucket to save to or load from" + ) + + parent_parser.add_argument("--coll", required=True, help="Coll name") + parent_parser.add_argument("--exp", required=True, help="EXP_NAME") + parent_parser.add_argument("--chan", required=True, help="CHAN_NAME") + parent_parser.add_argument("--coord", required=True, help="Coordinate_Frame") + parent_parser.add_argument("--dtype", required=True, help="data type") + parent_parser.add_argument("--itype", required=True, help="I type") + parent_parser.add_argument( + "--host", + required=False, + default="api.bossdb.org", + help="Name of boss host: api.bossdb.org", + ) + + parent_parser.add_argument("--res", type=int, help="Resolution") + parent_parser.add_argument("--xmin", type=int, default=0, help="Xmin") + parent_parser.add_argument("--xmax", type=int, default=1, help="Xmax") + parent_parser.add_argument("--ymin", type=int, default=0, help="Ymin") + parent_parser.add_argument("--ymax", type=int, default=1, help="Ymax") + parent_parser.add_argument("--zmin", type=int, default=0, help="Zmin") + parent_parser.add_argument("--zmax", type=int, default=1, help="Zmax") + parent_parser.add_argument("--padding", type=int, default=0, help="padding") + parent_parser.add_argument( + "--onesided", type=int, default=0, help="flag for one-sided padding" + ) # indicates whether padding is one-sided or two-sided push_parser = subparsers.add_parser( - 'push', help='Push images to boss', parents=[parent_parser] + "push", help="Push images to boss", parents=[parent_parser] ) pull_parser = subparsers.add_parser( - 'pull', help='Pull images from boss and optionally save to AWS S3', - parents=[parent_parser] + "pull", + help="Pull images from boss and optionally save to AWS S3", + parents=[parent_parser], ) merge_parser = subparsers.add_parser( - 'merge', help='Merge xbrain images to boss', - parents=[parent_parser] + "merge", help="Merge xbrain images to boss", parents=[parent_parser] ) - push_parser.add_argument('-i', '--input', required=True, help='Input file') + push_parser.add_argument("-i", "--input", required=True, help="Input file") + push_parser.add_argument( + "--source", required=False, help="Source channel for upload" + ) push_parser.set_defaults(func=boss_push_cutout) - merge_parser.add_argument('--templatesize', required=True, help='Template size (diameter of spherical template)',type=int) - merge_parser.add_argument('-i', '--input', required=True, help='Input file') - merge_parser.add_argument('--centroids', required=True, help='Centroid numpy file') - merge_parser.add_argument('-o', '--output', required=True, help='Output centroid file') + merge_parser.add_argument( + "--templatesize", + required=True, + help="Template size (diameter of spherical template)", + type=int, + ) + merge_parser.add_argument("-i", "--input", required=True, help="Input file") + merge_parser.add_argument("--centroids", required=True, help="Centroid numpy file") + merge_parser.add_argument( + "-o", "--output", required=True, help="Output centroid file" + ) merge_parser.set_defaults(func=boss_merge_xbrain) - pull_parser.add_argument('-o', '--output', required=True, help='Output file') + pull_parser.add_argument("-o", "--output", required=True, help="Output file") pull_parser.add_argument( - '--s3-only', dest='s3_only', action='store_true', default=False, help='Only save output to AWS S3' + "--s3-only", + dest="s3_only", + action="store_true", + default=False, + help="Only save output to AWS S3", ) pull_parser.set_defaults(func=boss_pull_cutout) args = parser.parse_args() args.func(args) -if __name__ == '__main__': - main() + +if __name__ == "__main__": + main() diff --git a/saber/boss_access/boss_push_nos3.cwl b/saber/boss_access/boss_push_nos3.cwl index bfbfcdd..78ac091 100644 --- a/saber/boss_access/boss_push_nos3.cwl +++ b/saber/boss_access/boss_push_nos3.cwl @@ -112,6 +112,10 @@ inputs: inputBinding: prefix: --padding position: 17 - + source: + type: string? + inputBinding: + prefix: --source + position: 18 outputs: [] diff --git a/saber/boss_access/boss_test/dummy_workflow.cwl b/saber/boss_access/boss_test/dummy_workflow.cwl new file mode 100644 index 0000000..19af353 --- /dev/null +++ b/saber/boss_access/boss_test/dummy_workflow.cwl @@ -0,0 +1,70 @@ + +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cwlVersion: v1.0 +class: Workflow +doc: local +inputs: + # Inputs for BOSS + host_name: string + token: string + coll_name: string + exp_name: string + coord_name: string + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + padding: int? + resolution: int? + output_name: string + dtype_name: string + itype_name: string + ## Boss pull + in_chan_name: string + +outputs: + pull_output: + type: File + outputSource: boss_pull/pull_output +steps: + boss_pull: + run: ../../../../saber/boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll_name + exp_name: exp_name + chan_name: in_chan_name + dtype_name: dtype_name + itype_name: itype_name + resolution: resolution + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: output_name + coord_name: coord_name + hints: + saber: + local: True + file_path: /Users/xenesd1/Projects/aplbrain/saber/volumes/data/local + out: + [pull_output] diff --git a/saber/boss_access/boss_test/job_params.yml b/saber/boss_access/boss_test/job_params.yml new file mode 100644 index 0000000..10bd6d4 --- /dev/null +++ b/saber/boss_access/boss_test/job_params.yml @@ -0,0 +1,32 @@ + +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Inputs for BOSS +host_name: api.bossdb.io +token: public +coll_name: Kasthuri +exp_name: em +dtype_name: uint8 +itype_name: image +coord_name: KasthuriFrame +xmin: 5000 +xmax: 6000 +ymin: 5000 +ymax: 6000 +zmin: 600 +zmax: 610 +padding: 0 +resolution: 0 +in_chan_name: images +output_name: kasthuri_raw.npy diff --git a/saber/boss_access/boss_test/sweep.yml b/saber/boss_access/boss_test/sweep.yml new file mode 100644 index 0000000..195d2d5 --- /dev/null +++ b/saber/boss_access/boss_test/sweep.yml @@ -0,0 +1,51 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +sampler: + method: batch-grid + batch_size: 2 +x: + range: + start: 5000 + stop: 6000 + step: 500 + parameters: + min: xmin + max: xmax + steps: + - boss_pull +y: + range: + start: 5000 + stop: 6000 + step: 500 + parameters: + min: ymin + max: ymax + steps: + - boss_pull +# z: +# range: +# start: 631 +# stop: 998 +# step: 92 +# parameters: +# min: zmin +# max: zmax +# steps: +# - boss_pull_raw +# - boss_push_synapses_boss +# - boss_push_synapses_bossdb +# - boss_push_membranes_boss +# - boss_push_membranes_bossdb diff --git a/saber/dvid_access/Dockerfile b/saber/dvid_access/Dockerfile new file mode 100644 index 0000000..c3aac50 --- /dev/null +++ b/saber/dvid_access/Dockerfile @@ -0,0 +1,25 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#Use an official Python runtime as a parent image +FROM python:3.6 + +RUN pip install boto3==1.11.8 +RUN git clone https://github.com/jhuapl-boss/intern.git && cd intern && git checkout RemoteExtension && git pull && python3 setup.py install --user +RUN mkdir /app +COPY ./dvid_access.py /app/ +RUN chown -R 1000:100 /app/ +ENV PATH /app:$PATH +WORKDIR /app + diff --git a/saber/dvid_access/README.md b/saber/dvid_access/README.md new file mode 100644 index 0000000..a6ae2d5 --- /dev/null +++ b/saber/dvid_access/README.md @@ -0,0 +1,28 @@ +# BOSS Access Docker Container + +## Overview + +This Docker container contains the tools necessary to push and pull data from the DVID Service. + +## Building + +1. Navigate to this folder + + ``` + cd saber/saber/dvid-access/ + ``` +1. Build the docker container + + ``` + docker build -t aplbrain/dvid-access . + ``` + +## Running + +One can either run this docker container as a standalone tool, or you can launch an interactive terminal and access the tools via the command line. This is recommended, as you only have to attach volumes once. + +``` +docker run -it -v ./data:/data/ aplbrain/dvid-access /bin/bash +``` + +This will launch the container as an interactive terminal and bind `./data` on your local system to `/data/` in the container. \ No newline at end of file diff --git a/saber/dvid_access/dvid_access.py b/saber/dvid_access/dvid_access.py new file mode 100755 index 0000000..a9a4ccf --- /dev/null +++ b/saber/dvid_access/dvid_access.py @@ -0,0 +1,187 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env python + +import argparse +import configparser +import tempfile +import boto3 +from intern.remote.dvid import DVIDRemote +from intern.resource.dvid import DataInstanceResource +import numpy as np +from requests import HTTPError + + +def DVID_pull_cutout(args): + rmt = DVIDRemote({"protocol": "http", "host": args.host}) + + # Create or get a channel to write to + instance_setup = DataInstanceResource( + UUID=args.uuid, + name=args.data_instance, + type=args.type, + alias=args.alias, + datatype=args.datatype, + ) + print("Data Instance setup.") + + x_rng = [args.xmin, args.xmax] + y_rng = [args.ymin, args.ymax] + z_rng = [args.zmin, args.zmax] + # Verify that the cutout uploaded correctly. + attempts = 0 + while attempts < 3: + try: + cutout_data = rmt.get_cutout(instance_setup, args.res, x_rng, y_rng, z_rng) + break + except HTTPError as e: + if attempts < 3: + attempts += 1 + print("Obtained HTTP error from server. Trial {}".format(attempts)) + else: + print("Failed 3 times: {}".format(e)) + # Data will be in Z,Y,X format + # Change to X,Y,Z for pipeline + cutout_data = np.transpose(cutout_data, (2, 1, 0)) + + # Clean up. + with open(args.output, "w+b") as f: + np.save(f, cutout_data) + + +# here we push a subset of padded data back to DVID +def DVID_push_cutout(args): + rmt = DVIDRemote({"protocol": "http", "host": args.host}) + + # data is desired range + + data = np.load(args.input) + + numpyType = np.uint8 + if args.datatype == "uint32": + numpyType = np.uint32 + elif args.datatype == "uint64": + numpyType = np.uint64 + + if data.dtype != args.datatype: + data = data.astype(numpyType) + sources = [] + if args.source: + sources.append(args.source) + + # Create or get a channel to write to + instance_setup = DataInstanceResource( + UUID=args.uuid, + name=args.data_instance, + type=args.type, + alias=args.alias, + datatype=args.datatype, + ) + print("Data Instance setup.") + chan_actual_up = rmt.create_project(instance_setup) + x_rng = [args.xmin, args.xmax] + y_rng = [args.ymin, args.ymax] + z_rng = [args.zmin, args.zmax] + + print("Data model setup. UUID: {}".format(chan_actual_up)) + + # Pipeline Data will be in X,Y,Z format + # Change to Z,Y,X for upload + data = np.transpose(data, (2, 1, 0)) + data = data.copy(order="C") + # Verify that the cutout uploaded correctly. + attempts = 0 + while attempts < 3: + try: + rmt.create_cutout(instance_setup, args.res, x_rng, y_rng, z_rng, data) + break + except HTTPError as e: + if attempts < 3: + attempts += 1 + print("These are the dimensions: ") + print(data.shape) + print("This is the data type:") + print(data.dtype) + print("Specified data type was:") + print(args.dtype) + print("Specified image type") + print(args.itype) + print("Obtained HTTP error from server. Trial {}".format(attempts)) + print("The error: {}".format(e)) + else: + raise Exception("Failed 3 times: {}".format(e)) + # Clean up + + +def main(): + parser = argparse.ArgumentParser(description="dvid processing script") + parent_parser = argparse.ArgumentParser(add_help=False) + subparsers = parser.add_subparsers(title="commands") + + parser.set_defaults(func=lambda _: parser.print_help()) + + parent_parser.add_argument( + "--uuid", required=False, default=None, help="Root UUID of the repository" + ) + parent_parser.add_argument( + "--alias", required=False, default="", help="Readable UUID Tag" + ) + parent_parser.add_argument( + "--data_instance", + required=True, + help="Name of data instance within repository ", + ) + parent_parser.add_argument( + "--datatype", + required=False, + help="data type of the instance (uint8, uint16, uint64) defaults to uint8", + ) + parent_parser.add_argument( + "--type", + required=False, + help="type of the resource (uint8blk, labelblk) defaults to uint8blk", + ) + parent_parser.add_argument("--host", required=True, help="Name of DVID host") + + parent_parser.add_argument("--res", type=int, help="Resolution") + parent_parser.add_argument("--xmin", type=int, default=0, help="Xmin") + parent_parser.add_argument("--xmax", type=int, default=1, help="Xmax") + parent_parser.add_argument("--ymin", type=int, default=0, help="Ymin") + parent_parser.add_argument("--ymax", type=int, default=1, help="Ymax") + parent_parser.add_argument("--zmin", type=int, default=0, help="Zmin") + parent_parser.add_argument("--zmax", type=int, default=1, help="Zmax") + + push_parser = subparsers.add_parser( + "push", help="Push images to dvid from input file", parents=[parent_parser] + ) + pull_parser = subparsers.add_parser( + "pull", help="Pull images from dvid", parents=[parent_parser] + ) + + push_parser.add_argument("-i", "--input", required=True, help="Input file") + push_parser.add_argument( + "--source", required=False, help="Source channel for upload" + ) + push_parser.set_defaults(func=DVID_push_cutout) + + pull_parser.add_argument("-o", "--output", required=True, help="Output file") + pull_parser.set_defaults(func=DVID_pull_cutout) + + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/saber/dvid_access/dvid_pull.cwl b/saber/dvid_access/dvid_pull.cwl new file mode 100644 index 0000000..bb90fad --- /dev/null +++ b/saber/dvid_access/dvid_pull.cwl @@ -0,0 +1,99 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: aplbrain/dvid-access +baseCommand: python +arguments: ['/app/dvid_access.py', 'pull'] +inputs: + host_name: + type: string + inputBinding: + position: 1 + prefix: --host + uuid: + type: string + inputBinding: + position: 2 + prefix: --uuid + dtype_name: + type: string + inputBinding: + position: 3 + prefix: --datatype + resource_name: + type: string + inputBinding: + position: 4 + prefix: --data_instance + resolution: + type: int? + inputBinding: + prefix: --res + position: 5 + xmin: + type: int? + inputBinding: + prefix: --xmin + position: 6 + xmax: + type: int? + inputBinding: + prefix: --xmax + position: 7 + ymin: + type: int? + inputBinding: + prefix: --ymin + position: 8 + ymax: + type: int? + inputBinding: + prefix: --ymax + position: 9 + zmin: + type: int? + inputBinding: + prefix: --zmin + position: 10 + zmax: + type: int? + inputBinding: + prefix: --zmax + position: 11 + output_name: + type: string + inputBinding: + position: 12 + prefix: --output + type: + type: string? + inputBinding: + prefix: --type + position: 13 + alias: + type: string? + inputBinding: + prefix: --alias + position: 14 +outputs: + pull_output: + type: File + outputBinding: + glob: $(inputs.output_name) diff --git a/saber/dvid_access/dvid_push.cwl b/saber/dvid_access/dvid_push.cwl new file mode 100644 index 0000000..94cbc8e --- /dev/null +++ b/saber/dvid_access/dvid_push.cwl @@ -0,0 +1,101 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: aplbrain/dvid-access +baseCommand: python +arguments: ['/app/dvid_access.py', 'push'] +inputs: + input: + type: File + inputBinding: + position: 1 + prefix: -i + host_name: + type: string + inputBinding: + position: 2 + prefix: --host + uuid: + type: string? + inputBinding: + position: 3 + prefix: --uuid + dtype_name: + type: string + inputBinding: + position: 4 + prefix: --datatype + resource_name: + type: string + inputBinding: + position: 5 + prefix: --data_instance + resolution: + type: int? + inputBinding: + prefix: --res + position: 6 + xmin: + type: int? + inputBinding: + prefix: --xmin + position: 7 + xmax: + type: int? + inputBinding: + prefix: --xmax + position: 8 + ymin: + type: int? + inputBinding: + prefix: --ymin + position: 9 + ymax: + type: int? + inputBinding: + prefix: --ymax + position: 10 + zmin: + type: int? + inputBinding: + prefix: --zmin + position: 11 + zmax: + type: int? + inputBinding: + prefix: --zmax + position: 12 + source: + type: string? + inputBinding: + prefix: --source + position: 13 + type: + type: string? + inputBinding: + prefix: --type + position: 14 + alias: + type: string? + inputBinding: + prefix: --alias + position: 14 +outputs: [] + diff --git a/saber/i2g/detection/Dockerfile b/saber/i2g/detection/Dockerfile index ba2a370..be97e32 100644 --- a/saber/i2g/detection/Dockerfile +++ b/saber/i2g/detection/Dockerfile @@ -54,4 +54,4 @@ ENV KERAS_BACKEND=theano ENV PATH=/src:$PATH RUN mkdir ~/.aws -ENTRYPOINT ["python3", "deploy_pipeline.py"] +ENTRYPOINT ["python", "deploy_pipeline.py"] diff --git a/saber/i2g/detection/gpu/Dockerfile b/saber/i2g/detection/Dockerfile-gpu similarity index 95% rename from saber/i2g/detection/gpu/Dockerfile rename to saber/i2g/detection/Dockerfile-gpu index 2cb8169..e6246eb 100644 --- a/saber/i2g/detection/gpu/Dockerfile +++ b/saber/i2g/detection/Dockerfile-gpu @@ -42,11 +42,12 @@ RUN apt-get -y install \ # Setup python packages RUN pip install --ignore-installed Theano -RUN pip install numpy -RUN pip3 install numpy +RUN pip install --ignore-installed numpy +RUN pip3 install --ignore-installed numpy RUN pip3 install --ignore-installed awscli RUN pip3 install --ignore-installed boto3 RUN pip install --ignore-installed SimpleITK +RUN pip install --ignore-installed enum # Upgrade six RUN pip3 install --upgrade six diff --git a/saber/i2g/detection/deploy_pipeline.py b/saber/i2g/detection/deploy_pipeline.py index 6485437..e33d806 100644 --- a/saber/i2g/detection/deploy_pipeline.py +++ b/saber/i2g/detection/deploy_pipeline.py @@ -27,7 +27,7 @@ np.random.seed(9999) -K.set_image_dim_ordering('th') +K.set_image_data_format('channels_first') #replaces K.set_image_dim_ordering('th') def get_parser(): diff --git a/saber/i2g/examples/I2G_Demo/jobPublic.yaml b/saber/i2g/examples/I2G_Demo/jobPublic.yaml index 9559799..c755da4 100644 --- a/saber/i2g/examples/I2G_Demo/jobPublic.yaml +++ b/saber/i2g/examples/I2G_Demo/jobPublic.yaml @@ -16,7 +16,7 @@ #------------------------------------------------ # general: host_bossdb: api.bossdb.org -token_bossdb: 9154f9b731940e65844ea3568a0d7daa94f06f8b +token_bossdb: 6edf6eacfe4b5a36b7184ad477db1fd7876e7982 coll_name: Kasthuri exp_name: em exp_name_out: I2G_Saber_pipeline_results @@ -64,4 +64,5 @@ mode: synapse neuron_mode: 1 agg_threshold: "0.5" seeds_cc_threshold: "5" +_saber_bucket: saber-batch diff --git a/saber/i2g/examples/I2G_Demo/job_simple.yaml b/saber/i2g/examples/I2G_Demo/job_simple.yaml new file mode 100644 index 0000000..647ee44 --- /dev/null +++ b/saber/i2g/examples/I2G_Demo/job_simple.yaml @@ -0,0 +1,57 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# BOSS: +#------------------------------------------------ +# general: +host_bossdb: api.bossdb.io +token_bossdb: <> +coll_name: Kasthuri +exp_name: ac4 +coord_name: ac4-cf +resolution: 0 +xmin: 0 +xmax: 512 +ymin: 0 +ymax: 512 +zmin: 0 +zmax: 50 +padding: 0 +onesided: 0 +#inputs: +in_chan_name_raw: em +itype_name_in: image +dtype_name_in: uint8 +#outputs +pull_output_name_raw: pull_output_raw.npy + + +#PROCESSING: +#------------------------------------------------ +#General: +width: 512 +height: 512 +mode: synapse +#Synapse_detection +synapse_output: synapse_output.npy +#Membrane_detection +membrane_output: membrane_output.npy +#neuron_segmentation: +train_file: ./trained_classifier.pkl +neuron_output: neuron_output.npy +mode: synapse +neuron_mode: 1 +agg_threshold: "0.5" +seeds_cc_threshold: "5" +_saber_bucket: saber-batch diff --git a/saber/i2g/examples/I2G_Demo/workflow_i2g.cwl b/saber/i2g/examples/I2G_Demo/workflow_i2g.cwl index ca05cf0..641c84a 100644 --- a/saber/i2g/examples/I2G_Demo/workflow_i2g.cwl +++ b/saber/i2g/examples/I2G_Demo/workflow_i2g.cwl @@ -192,4 +192,4 @@ steps: padding: padding input: membrane_detection/membrane_detection_out coord_name: coord_name_out - out: [] \ No newline at end of file + out: [] diff --git a/saber/i2g/examples/I2G_Demo/workflow_simple.cwl b/saber/i2g/examples/I2G_Demo/workflow_simple.cwl new file mode 100644 index 0000000..3872276 --- /dev/null +++ b/saber/i2g/examples/I2G_Demo/workflow_simple.cwl @@ -0,0 +1,142 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner +## This workflow will make use of the general synapse and membrane detection cwl files, meaning the processes will happen on CPU rather than on GPU. Does not include Boss push steps. + +cwlVersion: v1.0 +class: Workflow +doc: local + +inputs: + + # Inputs for BOSS + host_bossdb: string + token_bossdb: string? + coll_name: string + exp_name: string + in_chan_name_raw: string + dtype_name_in: string + itype_name_in: string + coord_name: string + resolution: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + padding: int? + pull_output_name_raw: string + + #Inputs for processing + width: int? + height: int? + mode: string + + #Inputs for neuron_segmentation + train_file: File? + neuron_mode: string + seeds_cc_threshold: string + agg_threshold: string + + #Inputs for output names: + synapse_output: string + membrane_output: string + neuron_output: string + +outputs: + pull_output_raw: + type: File + outputSource: boss_pull_raw/pull_output + synapse_detection: + type: File + outputSource: synapse_detection/synapse_detection_out + membrane_detection: + type: File + outputSource: membrane_detection/membrane_detection_out + neuron_segmentation: + type: File + outputSource: neuron_segmentation/neuron_segmentation_out + +steps: + boss_pull_raw: + run: ../../../../saber/boss_access/boss_pull_nos3.cwl + in: + token: token_bossdb + host_name: host_bossdb + coll_name: coll_name + exp_name: exp_name + chan_name: in_chan_name_raw + dtype_name: dtype_name_in + itype_name: itype_name_in + resolution: resolution + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: pull_output_name_raw + coord_name: coord_name + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: + [pull_output] + + + synapse_detection: + run: ../../../../saber/i2g/detection/synapse_detection.cwl + in: + input: boss_pull_raw/pull_output + width: width + height: height + mode: mode + output: synapse_output + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: [synapse_detection_out] + + membrane_detection: + run: ../../../../saber/i2g/detection/membrane_detection.cwl + in: + input: boss_pull_raw/pull_output + width: width + height: height + output: membrane_output + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: [membrane_detection_out] + + neuron_segmentation: + run: ../../../../saber/i2g/neuron_segmentation/neuron_segmentation.cwl + in: + prob_file: membrane_detection/membrane_detection_out + mode: neuron_mode + train_file: train_file + agg_threshold: agg_threshold + seeds_cc_threshold: seeds_cc_threshold + outfile: neuron_output + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: [neuron_segmentation_out] diff --git a/saber/i2g/examples/I2G_FFN/job_ffn.yml b/saber/i2g/examples/I2G_FFN/job_ffn.yml new file mode 100644 index 0000000..56e6304 --- /dev/null +++ b/saber/i2g/examples/I2G_FFN/job_ffn.yml @@ -0,0 +1,39 @@ +# BOSS: +#------------------------------------------------ +# general: +host_bossdb: api.bossdb.io +token_bossdb: public +coll_name: neuroproof_examples +exp_name: training_sample +coord_name: neuroproof_training_sample_2 +resolution: 0 +xmin: 0 +xmax: 250 +ymin: 0 +ymax: 250 +zmin: 0 +zmax: 250 +padding: 0 +onesided: 0 +#inputs: +in_chan_name_raw: images +itype_name_in: image +dtype_name_in: uint8 +#outputs +pull_output_name_raw: pull_output_raw.npy + +# FFN segmentation +image_mean: 128 +image_stddev: 33 +depth: 12 +fov_size: 33,33,33 +deltas: 8,8,8 +init_activation: 0.95 +pad_value: 0.05 +move_threshold: 0.9 +min_boundary_dist: 1,1,1 +segment_threshold: 0.6 +min_segment_size: 100 +bound_start: 0,0,0 +bound_stop: 250,250,250 +outfile: ffn_seg_out.npy \ No newline at end of file diff --git a/saber/i2g/examples/I2G_FFN/workflow_ffn.cwl b/saber/i2g/examples/I2G_FFN/workflow_ffn.cwl new file mode 100644 index 0000000..87d72ff --- /dev/null +++ b/saber/i2g/examples/I2G_FFN/workflow_ffn.cwl @@ -0,0 +1,116 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner +## This workflow will make use of the general synapse and membrane detection cwl files, meaning the processes will happen on CPU rather than on GPU. Does not include Boss push steps. + +cwlVersion: v1.0 +class: Workflow +doc: local + +inputs: + # Inputs for BOSS + host_bossdb: string + token_bossdb: string? + coll_name: string + exp_name: string + in_chan_name_raw: string + dtype_name_in: string + itype_name_in: string + coord_name: string + resolution: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + padding: int? + pull_output_name_raw: string + + #Inputs for FFN + image_mean: string + image_stddev: string + depth: string + fov_size: string + deltas: string + init_activation: string + pad_value: string + move_threshold: string + min_boundary_dist: string + segment_threshold: string + min_segment_size: string + bound_start: string + bound_stop: string + outfile: string + +outputs: + pull_output_raw: + type: File + outputSource: boss_pull_raw/pull_output + ffn_segmentation: + type: File + outputSource: ffn_segmentation/ffn_out + +steps: + boss_pull_raw: + run: ../../../../saber/boss_access/boss_pull_nos3.cwl + in: + token: token_bossdb + host_name: host_bossdb + coll_name: coll_name + exp_name: exp_name + chan_name: in_chan_name_raw + dtype_name: dtype_name_in + itype_name: itype_name_in + resolution: resolution + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: pull_output_name_raw + coord_name: coord_name + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: + [pull_output] + + ffn_segmentation: + run: ../../../../saber/i2g/ffns/ffn_segmentation.cwl + in: + input: boss_pull_raw/pull_output + image_mean: image_mean + image_stddev: image_stddev + depth: depth + fov_size: fov_size + deltas: deltas + init_activation: init_activation + pad_value: pad_value + move_threshold: move_threshold + min_boundary_dist: min_boundary_dist + segment_threshold: segment_threshold + min_segment_size: min_segment_size + bound_start: bound_start + bound_stop: bound_stop + outfile: outfile + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: [ffn_out] \ No newline at end of file diff --git a/saber/i2g/examples/I2G_Neuroproof/job_neuroproof_deploy.yaml b/saber/i2g/examples/I2G_Neuroproof/job_neuroproof_deploy.yaml new file mode 100644 index 0000000..ff411a0 --- /dev/null +++ b/saber/i2g/examples/I2G_Neuroproof/job_neuroproof_deploy.yaml @@ -0,0 +1,57 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# BOSS: +#------------------------------------------------ +host_bossdb: api.bossdb.io +token_bossdb: public +coll_name: kasthuri2015 +exp_name: ac4 +coord_name: ac4-cf +resolution: 0 +xmin: 256 +xmax: 320 +ymin: 256 +ymax: 320 +zmin: 0 +zmax: 64 +padding: 0 +onesided: 0 + +#inputs: +in_chan_name_raw: em +itype_name_in: image +dtype_name_in: uint8 +#outputs +pull_output_name_raw: pull_output_raw.npy + +#PROCESSING: +#------------------------------------------------ +#General: +width: 64 +height: 64 +mode: synapse +#Membrane_detection +membrane_output: membrane_output.npy +#neuron_segmentation: +train_file: ./trained_classifier.pkl +neuron_output: neuron_output.npy +mode: synapse +neuron_mode: 1 +agg_threshold: "0.5" +seeds_cc_threshold: "5" +#neuroproof +class_file: ./kasthuri_classifier.xml +neuroproof_output: neuroproof_output.npy + diff --git a/saber/i2g/examples/I2G_Neuroproof/job_neuroproof_train.yaml b/saber/i2g/examples/I2G_Neuroproof/job_neuroproof_train.yaml new file mode 100644 index 0000000..3e6297d --- /dev/null +++ b/saber/i2g/examples/I2G_Neuroproof/job_neuroproof_train.yaml @@ -0,0 +1,20 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#neuroproof: +mode: 0 +ws_file: oversegmented_stack_labels.h5 +pred_file: boundary_prediction.h5 +gt_file: groundtruth.h5 +neuroproof_output: myclassifier.xml diff --git a/saber/i2g/examples/I2G_Neuroproof/workflow_neuroproof_deploy.cwl b/saber/i2g/examples/I2G_Neuroproof/workflow_neuroproof_deploy.cwl new file mode 100644 index 0000000..7da0af2 --- /dev/null +++ b/saber/i2g/examples/I2G_Neuroproof/workflow_neuroproof_deploy.cwl @@ -0,0 +1,144 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner +## This workflow will make use of the general synapse and membrane detection cwl files, meaning the processes will happen on CPU rather than on GPU. Does not include Boss push steps. + +cwlVersion: v1.0 +class: Workflow +doc: local + +inputs: + + # Inputs for BOSS + host_bossdb: string + token_bossdb: string? + coll_name: string + exp_name: string + in_chan_name_raw: string + dtype_name_in: string + itype_name_in: string + coord_name: string + resolution: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + padding: int? + pull_output_name_raw: string + + #Inputs for processing + width: int? + height: int? + mode: string + + #Inputs for neuron_segmentation + train_file: File? + neuron_mode: string + seeds_cc_threshold: string + agg_threshold: string + + #Inputs for output names: + membrane_output: string + neuron_output: string + + #Inputs for Neuroproof + class_file: File + neuroproof_output: string + +outputs: + pull_output_raw: + type: File + outputSource: boss_pull_raw/pull_output + membrane_detection: + type: File + outputSource: membrane_detection/membrane_detection_out + neuron_segmentation: + type: File + outputSource: neuron_segmentation/neuron_segmentation_out + neuroproof: + type: File + outputSource: neuroproof/neuroproof_out + +steps: + boss_pull_raw: + run: ../../../../saber/boss_access/boss_pull_nos3.cwl + in: + token: token_bossdb + host_name: host_bossdb + coll_name: coll_name + exp_name: exp_name + chan_name: in_chan_name_raw + dtype_name: dtype_name_in + itype_name: itype_name_in + resolution: resolution + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: pull_output_name_raw + coord_name: coord_name + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: + [pull_output] + + membrane_detection: + run: ../../../../saber/i2g/detection/membrane_detection.cwl + in: + input: boss_pull_raw/pull_output + width: width + height: height + output: membrane_output + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: [membrane_detection_out] + + neuron_segmentation: + run: ../../../../saber/i2g/neuron_segmentation/neuron_segmentation.cwl + in: + prob_file: membrane_detection/membrane_detection_out + mode: neuron_mode + train_file: train_file + agg_threshold: agg_threshold + seeds_cc_threshold: seeds_cc_threshold + outfile: neuron_output + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: [neuron_segmentation_out] + + neuroproof: + run: ../../../../saber/i2g/neuroproof/neuroproof.cwl + in: + mode: neuron_mode + ws_file: neuron_segmentation/neuron_segmentation_out + pred_file: membrane_detection/membrane_detection_out + class_file: class_file + outfile: neuroproof_output + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + out: [neuroproof_out] diff --git a/saber/i2g/examples/I2G_Neuroproof/workflow_neuroproof_train.cwl b/saber/i2g/examples/I2G_Neuroproof/workflow_neuroproof_train.cwl new file mode 100644 index 0000000..33d3766 --- /dev/null +++ b/saber/i2g/examples/I2G_Neuroproof/workflow_neuroproof_train.cwl @@ -0,0 +1,51 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner +## This workflow will make use of the general synapse and membrane detection cwl files, meaning the processes will happen on CPU rather than on GPU. Does not include Boss push steps. + +cwlVersion: v1.0 +class: Workflow +doc: local + +inputs: + #Inputs for Neuroproof + mode: string + ws_file: File + pred_file: File + gt_file: File + + + #Inputs for output names: + neuroproof_output: string + +outputs: + neuroproof: + type: File + outputSource: neuroproof/neuroproof_out + +steps: + neuroproof: + run: ../../../../saber/i2g/neuroproof/neuroproof.cwl + in: + mode: mode + ws_file: ws_file + pred_file: pred_file + gt_file: gt_file + outfile: neuroproof_output + hints: + saber: + local: True + file_path: /Users/xenesd1/Projects/aplbrain/saber/volumes/data/local + out: [neuroproof_out] diff --git a/saber/i2g/ffns/Dockerfile.base b/saber/i2g/ffns/Dockerfile.base new file mode 100644 index 0000000..1c286cf --- /dev/null +++ b/saber/i2g/ffns/Dockerfile.base @@ -0,0 +1,11 @@ +FROM tensorflow/tensorflow:1.14.0-gpu + +LABEL maintainer="Jordan Matelsky " + +RUN apt update +RUN apt install -y git +RUN pip install scikit-image scipy numpy tensorflow-gpu h5py pillow absl-py +RUN git clone https://github.com/google/ffn/ \ + && cd ffn \ + && git checkout 30decd27d9d4f3ef5768f2608c8c4d3350f8232b + diff --git a/saber/i2g/ffns/Dockerfile.inference b/saber/i2g/ffns/Dockerfile.inference new file mode 100644 index 0000000..bfd0cb1 --- /dev/null +++ b/saber/i2g/ffns/Dockerfile.inference @@ -0,0 +1,19 @@ +FROM ffn-base + +LABEL maintainer="Jordan Matelsky " + +RUN apt-get install -yqq \ + python3-dev \ + python3-pip + +WORKDIR "ffn" +COPY ./inference/config_template.pbtxt . +COPY ./inference/get-latest-checkpoint . +COPY ./inference/npy2h5.py . +COPY ./inference/driver.py . +COPY ./model /model +RUN mkdir /data +RUN mkdir /latest-model +RUN export LATEST=`./get-latest-checkpoint` +RUN cp /model/model.ckpt-$LATEST* /latest-model +ENTRYPOINT ["python", "driver.py"] diff --git a/saber/i2g/ffns/Dockerfile.train b/saber/i2g/ffns/Dockerfile.train new file mode 100644 index 0000000..bc696eb --- /dev/null +++ b/saber/i2g/ffns/Dockerfile.train @@ -0,0 +1,8 @@ +FROM ffn-base + +LABEL maintainer="Jordan Matelsky " + +WORKDIR "ffn" +COPY ./train/main.sh . + +ENTRYPOINT ["bash", "-c", "./main.sh"] diff --git a/saber/i2g/ffns/README.md b/saber/i2g/ffns/README.md new file mode 100644 index 0000000..21f42a5 --- /dev/null +++ b/saber/i2g/ffns/README.md @@ -0,0 +1,11 @@ +# SABER Floodfill Networks + +## Building the docker containers + +```shell +docker build --rm -t ffn-base -f Dockerfile.base . +docker build --rm -t aplbrain/ffn-inference -f Dockerfile.inference . +``` + +## python driver test +docker run --rm -v $(pwd)/results:/ffn/output/ aplbrain/ffn-inference -c config.pbtxt -bb ['start { x:0 y:0 z:0 } size { x:64 y:64 z:63 }'] -o output/my_seg.npy diff --git a/saber/i2g/ffns/example_config.pbtxt b/saber/i2g/ffns/example_config.pbtxt new file mode 100644 index 0000000..2e65bb8 --- /dev/null +++ b/saber/i2g/ffns/example_config.pbtxt @@ -0,0 +1,19 @@ +image { + hdf5: "/data/raw.h5:raw" +} +image_mean: 128 +image_stddev: 33 +checkpoint_interval: 1800 +seed_policy: "PolicyPeaks" +model_checkpoint_path: "/latest-model/model.ckpt-27465036" +model_name: "convstack_3d.ConvStack3DFFNModel" +model_args: "{\"depth\": 12, \"fov_size\": [33, 33, 33], \"deltas\": [8, 8, 8]}" +segmentation_output_dir: "/results" +inference_options { + init_activation: 0.95 + pad_value: 0.05 + move_threshold: 0.9 + min_boundary_dist { x: 1 y: 1 z: 1} + segment_threshold: 0.6 + min_segment_size: 100 +} diff --git a/saber/i2g/ffns/ffn_segmentation.cwl b/saber/i2g/ffns/ffn_segmentation.cwl new file mode 100644 index 0000000..f8b5e78 --- /dev/null +++ b/saber/i2g/ffns/ffn_segmentation.cwl @@ -0,0 +1,103 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: aplbrain/ffn-inference +baseCommand: python +arguments: ["driver.py"] +inputs: + input: + type: File + inputBinding: + position: 1 + prefix: --input_file + image_mean: + type: string + inputBinding: + position: 2 + prefix: --image_mean + image_stddev: + type: string + inputBinding: + position: 3 + prefix: --image_stddev + depth: + type: string + inputBinding: + position: 4 + prefix: --depth + fov_size: + type: string + inputBinding: + position: 5 + prefix: --fov_size + deltas: + type: string + inputBinding: + position: 6 + prefix: --deltas + init_activation: + type: string + inputBinding: + position: 7 + prefix: --init_activation + pad_value: + type: string + inputBinding: + position: 8 + prefix: --pad_value + move_threshold: + type: string + inputBinding: + position: 9 + prefix: --move_threshold + min_boundary_dist: + type: string + inputBinding: + position: 10 + prefix: --min_boundary_dist + segment_threshold: + type: string + inputBinding: + position: 11 + prefix: --segment_threshold + min_segment_size: + type: string + inputBinding: + position: 12 + prefix: --min_segment_size + bound_start: + type: string + inputBinding: + position: 13 + prefix: --bound_start + bound_stop: + type: string + inputBinding: + position: 14 + prefix: --bound_stop + outfile: + type: string + inputBinding: + position: 15 + prefix: --outfile + +outputs: + ffn_out: + type: File + outputBinding: + glob: $(inputs.outfile) diff --git a/saber/i2g/ffns/ffn_train.cwl b/saber/i2g/ffns/ffn_train.cwl new file mode 100644 index 0000000..97c97f1 --- /dev/null +++ b/saber/i2g/ffns/ffn_train.cwl @@ -0,0 +1,107 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: aplbrain/ffn_train +baseCommand: /bin/bash +arguments: ["main.sh"] +inputs: + input: + type: File + inputBinding: + position: 1 + prefix: --input + seg_input: + type: File + inputBinding: + position: 2 + prefix: --seg_input + min_thres: + type: float? + inputBinding: + position: 3 + prefix: --min_thres + max_thres: + type: float? + inputBinding: + position: 4 + prefix: --max_thres + thres_step: + type: float? + inputBinding: + position: 5 + prefix: --thres_step + lom_radius: + type: int? + inputBinding: + position: 6 + prefix: --lom_radius + min_size: + type: int? + inputBinding: + position: 7 + prefix: --min_size + margin: + type: int? + inputBinding: + position: 8 + prefix: --margin + model_name: + type: string + inputBinding: + position: 9 + prefix: --name + depth: + type: int? + inputBinding: + position: 10 + prefix: --depth + fov: + type: int? + inputBinding: + position: 11 + prefix: --fov + deltas: + type: int? + inputBinding: + position: 12 + prefix: --deltas + image_mean: + type: int? + inputBinding: + position: 13 + prefix: --image_mean + image_std: + type: int? + inputBinding: + position: 14 + prefix: --image_std + max_steps: + type: int? + inputBinding: + position: 15 + prefix: --max_steps + output: + type: string + inputBinding: + position: 16 + prefix: --output +outputs: + training_out: + type: File + outputBinding: + glob: $(inputs.output) diff --git a/saber/i2g/ffns/ffn_workflow.cwl b/saber/i2g/ffns/ffn_workflow.cwl new file mode 100644 index 0000000..4c24265 --- /dev/null +++ b/saber/i2g/ffns/ffn_workflow.cwl @@ -0,0 +1,160 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +inputs: + host_name: host_name + token: token + coll_name: coll_name + exp_name: exp_name + chan_name: chan_img + dtype_name: dtype_img + itype_name: itype + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: raw_pull_output_name + coord_name: coord_name + bucket: bucket + +outputs: + train_output: + type: File + outputSource: optimize/classifier_weights +steps: + raw_boss_pull: + run: ../../boss_access/boss_pull.cwl + in: + host_name: host_name + token: token + coll_name: coll_name + exp_name: exp_name + chan_name: chan_img + dtype_name: dtype_img + itype_name: itype + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: raw_pull_output_name + coord_name: coord_name + bucket: bucket + out: + [raw_pull_output] + anno_boss_pull: + run: ../../boss_access/boss_pull.cwl + in: + host_name: host_name + token: token + coll_name: coll_name + exp_name: exp_name + chan_name: chan_lbl + dtype_name: dtype_lbl + itype_name: itype + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: anno_pull_output_name + coord_name: coord_name + bucket: bucket + out: + [anno_pull_output] + optimize: + run: ../ffns_seg.cwl + in: + input: raw_boss_pull/raw_pull_output + seg_input: anno_boss_pull/anno_pull_output + height: + type: int? + inputBinding: + position: 2 + prefix: --height + min_thres: + type: float? + inputBinding: + position: 3 + prefix: --min_thres + max_thres: + type: float? + inputBinding: + position: 4 + prefix: --max_thres + thres_step: + type: float? + inputBinding: + position: 5 + prefix: --thres_step + lom_radius: + type: int? + inputBinding: + position: 6 + prefix: --lom_radius + min_size: + type: int? + inputBinding: + position: 7 + prefix: --min_size + margin: + type: int? + inputBinding: + position: 8 + prefix: --margin + model_name: + type: string + inputBinding: + position: 9 + prefix: --name + depth: + type: int? + inputBinding: + position: 10 + prefix: --depth + fov: + type: int? + inputBinding: + position: 11 + prefix: --fov + deltas: + type: int? + inputBinding: + position: 12 + prefix: --deltas + image_mean: + type: int? + inputBinding: + position: 13 + prefix: --image_mean + image_std: + type: int? + inputBinding: + position: 14 + prefix: --image_std + max_steps: + type: int? + inputBinding: + position: 15 + prefix: --max_steps + output: + type: string + inputBinding: + position: 16 + prefix: --output + out: [classifier_weights] + hints: + saber: + score_format: "F1: {score}" + local: True diff --git a/saber/i2g/ffns/inference/config_template.pbtxt b/saber/i2g/ffns/inference/config_template.pbtxt new file mode 100644 index 0000000..132addf --- /dev/null +++ b/saber/i2g/ffns/inference/config_template.pbtxt @@ -0,0 +1,19 @@ +image { + hdf5: "/data/raw.h5:raw" +} +image_mean: {} +image_stddev: {} +checkpoint_interval: 1800 +seed_policy: "PolicyPeaks" +model_checkpoint_path: "/latest-model/model.ckpt-27465036" +model_name: "convstack_3d.ConvStack3DFFNModel" +model_args: {} +segmentation_output_dir: "/results" +inference_options { + init_activation: {} + pad_value: {} + move_threshold: {} + min_boundary_dist {} + segment_threshold: {} + min_segment_size: {} +} diff --git a/saber/i2g/ffns/inference/driver.py b/saber/i2g/ffns/inference/driver.py new file mode 100644 index 0000000..f86aab3 --- /dev/null +++ b/saber/i2g/ffns/inference/driver.py @@ -0,0 +1,160 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import numpy as np +import npy2h5 +from subprocess import call + +def get_parser(): + parser = argparse.ArgumentParser(description='Flood-filling Networks Script') + parser.set_defaults(func=lambda _: parser.print_help()) + parser.add_argument( + '-i', + '--input_file', + required=True, + help='Raw EM Volume') + parser.add_argument( + '--image_mean', + required=True, + help='image mean') + parser.add_argument( + '--image_stddev', + required=True, + help='image std dev') + parser.add_argument( + '--depth', + required=True, + help='depth, fov size, deltas') + parser.add_argument( + '--fov_size', + required=True, + help='depth, fov size, deltas') + parser.add_argument( + '--deltas', + required=True, + help='depth, fov size, deltas') + parser.add_argument( + '--init_activation', + required=True, + help='init activation') + parser.add_argument( + '--pad_value', + required=True, + help='pad value') + parser.add_argument( + '--move_threshold', + required=True, + help='move threshold') + parser.add_argument( + '--min_boundary_dist', + required=True, + help='min boundary dist') + parser.add_argument( + '--segment_threshold', + required=True, + help='segment thresh') + parser.add_argument( + '--min_segment_size', + required=True, + help='segment size') + parser.add_argument( + '--bound_start', + required=True, + help='X,Y,Z start bound') + parser.add_argument( + '-p', + '--bound_stop', + required=True, + help='X,Y,Z stop bound') + parser.add_argument( + '-o', + '--outfile', + required=True, + help='Output file') + return parser + +def config_file_parser(args): + model_args = "\"{{\\\"depth\\\": {}, \\\"fov_size\\\": [{}], \\\"deltas\\\": [{}]}}\"".format( + args.depth, + args.fov_size, + args.deltas + ) + min_boundary_dist = "{{ x: {} y: {} z: {}}}".format( + args.min_boundary_dist.split(',')[0], + args.min_boundary_dist.split(',')[1], + args.min_boundary_dist.split(',')[2] + ) + params = { + 'image_mean': args.image_mean, + 'image_stddev': args.image_stddev, + 'model_args': model_args, + 'init_activation': args.init_activation, + 'pad_value': args.pad_value, + 'move_threshold': args.move_threshold, + 'min_boundary_dist': min_boundary_dist, + 'segment_threshold': args.segment_threshold, + 'min_segment_size': args.min_segment_size + } + + config_file = open('config.pbtxt','w') + with open('config_template.pbtxt', 'r') as template_file: + for line in template_file.readlines(): + if "{}" in line: + param = line.split(':')[0].strip() + if param in params.keys(): + line = line.format(params[param]) + elif 'min_boundary_dist' in line: + line = line.format(params['min_boundary_dist']) + config_file.write(line) + config_file.close() + +def bounding_box_parser(start,stop): + start_list = start.split(',') + stop_list = stop.split(',') + if len(start_list) != 3 and len(stop_list) != 3: + raise Exception('Unable to parse bounding box. {}:{}'.format(start,stop)) + flag = "start {{ x:{} y:{} z:{} }} size {{ x:{} y:{} z:{} }}".format( + start_list[0], start_list[1], start_list[2], + stop_list[0], stop_list[1], stop_list[2] + ) + return flag + +def deploy(args): + config_file_parser(args) + npy2h5.convert(args.input_file, '/data/raw.h5', 'raw') + bounding_box = bounding_box_parser(args.bound_start, args.bound_stop) + with open('config.pbtxt') as params: + params = params.read() + ec = call(['python', 'run_inference.py', + '--inference_request={}'.format(params), + "--bounding_box", bounding_box]) + if ec != 0: + raise SystemError('Child process failed with exit code {}... exiting...'.format(ec)) + for file in os.listdir('/results/0/0/'): + if file[-4:] == '.npz': + data = np.load('/results/0/0/'+file) + seg_arr = data['segmentation'].astype(np.uint64) + with open(args.outfile, 'wb') as f: + np.save(f,seg_arr) + + +if __name__ == '__main__': + parser = get_parser() + args = parser.parse_args() + deploy(args) + print('Done') + + diff --git a/saber/i2g/ffns/inference/get-latest-checkpoint b/saber/i2g/ffns/inference/get-latest-checkpoint new file mode 100755 index 0000000..81e0c75 --- /dev/null +++ b/saber/i2g/ffns/inference/get-latest-checkpoint @@ -0,0 +1,9 @@ +#!/usr/bin/env python + +import glob + +def get_max_checkpoint(path="/model/model.ckpt-*"): + return max([int(f.split(".")[-2].split("-")[1]) for f in glob.glob(path)]) + +if __name__ == "__main__": + print(get_max_checkpoint()) diff --git a/saber/i2g/ffns/inference/npy2h5.py b/saber/i2g/ffns/inference/npy2h5.py new file mode 100644 index 0000000..3ba1c22 --- /dev/null +++ b/saber/i2g/ffns/inference/npy2h5.py @@ -0,0 +1,29 @@ +""" +Copyright 2018 The Johns Hopkins University Applied Physics Laboratory. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + + +import numpy as np +import h5py + +def convert(file, path, name): + data = np.load(file) + dtype = data.dtype + if dtype != np.uint8: + print("Converting to uint8 from " + str(dtype)) + dtype = np.uint8 + with h5py.File(path, 'w') as fh: + fh.create_dataset(name, data=data, dtype=dtype) + diff --git a/saber/i2g/ffns/train/main.sh b/saber/i2g/ffns/train/main.sh new file mode 100755 index 0000000..f057a24 --- /dev/null +++ b/saber/i2g/ffns/train/main.sh @@ -0,0 +1,10 @@ +echo "Computing partitions... (Go get a coffee.)" + +python ./compute_partitions.py --input_volume /data/seg.h5:seg --output_volume /data/af.h5:af --thresholds 0.025,0.05,0.075,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9 --lom_radius 24,24,24 --min_size 10000 + +echo "Building coordinates file... (Go get lunch.)" +# Takes a very long time: +python build_coordinates.py --partition_volumes val:/data/af.h5:af --coordinate_output /data/tf_record_file --margin 24,24,24 + +echo "Training. (Go get a life!)" +python train.py --train_coords /data/tf_record_file --data_volumes val:/data/raw.h5:raw --label_volumes val:/data/seg.h5:seg --model_name convstack_3d.ConvStack3DFFNModel --model_args "{\"depth\": 12, \"fov_size\": [33, 33, 33], \"deltas\": [8, 8, 8]}" --image_mean 128 --image_stddev 33 --train_dir '/model' --max_steps 4000000 diff --git a/saber/i2g/neuron_segmentation/requirements.txt b/saber/i2g/neuron_segmentation/requirements.txt index ead96b9..5c1885e 100644 --- a/saber/i2g/neuron_segmentation/requirements.txt +++ b/saber/i2g/neuron_segmentation/requirements.txt @@ -8,7 +8,7 @@ numpydoc>=0.5 pillow>=2.7.0 networkx>=1.6,<2.0 h5py>=2.6 -scipy>=0.16 +scipy==0.19.1 cython>=0.17 viridis>=0.4 scikit-image>=0.12 diff --git a/saber/i2g/neuroproof/Dockerfile b/saber/i2g/neuroproof/Dockerfile new file mode 100644 index 0000000..af4fafe --- /dev/null +++ b/saber/i2g/neuroproof/Dockerfile @@ -0,0 +1,24 @@ +FROM conda/miniconda3 + +#Shared Library Dependencies +RUN apt-get -qq update && apt-get install -qq -y \ + git \ + libgl1-mesa-dev \ + libsm6 \ + libglib2.0-0 \ + libxrender1 \ + libxss1 \ + libxft2 \ + libxt6 + +#Neuroproof Installation +RUN conda create -n saber_neuroproof -c flyem neuroproof +ENV PATH=$PATH:"/usr/local/envs/saber_neuroproof/bin" +RUN pip install numpy h5py + +WORKDIR /app +COPY driver.py /app/driver.py +# COPY kasthuri_classifier.xml /app/kasthuri_classifier.xml +#RUN git clone https://github.com/janelia-flyem/neuroproof_examples.git +RUN wget --directory-prefix /app/kasthuri_classifier.xml https://saber-batch-dev.s3.amazonaws.com/kasthuri_classifier.xml +ENTRYPOINT ["python3", "driver.py" ] diff --git a/saber/i2g/neuroproof/README.md b/saber/i2g/neuroproof/README.md new file mode 100644 index 0000000..e45d4dd --- /dev/null +++ b/saber/i2g/neuroproof/README.md @@ -0,0 +1,5 @@ +## Example Commands + +Try: + +`docker run -v $(pwd):/app aplbrain/neuroproof python3 ./driver.py -m 0 -o my_classifier.xml --pred_file ./test_data/boundary_prediction.h5 --gt_file ./test_data/groundtruth.h5 --ws_file ./test_data/oversegmented_stack_labels.h5` \ No newline at end of file diff --git a/saber/i2g/neuroproof/driver.py b/saber/i2g/neuroproof/driver.py new file mode 100644 index 0000000..fcb9391 --- /dev/null +++ b/saber/i2g/neuroproof/driver.py @@ -0,0 +1,114 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from subprocess import call +import argparse +import h5py +import numpy as np + +def get_parser(): + parser = argparse.ArgumentParser(description='Neuroproof Aggolmeration script') + parser.set_defaults(func=lambda _: parser.print_help()) + parser.add_argument( + '-m', + '--mode', + required=True, + help='Train(0) or Deploy(1)') + parser.add_argument( + '--ws_file', + required=True, + help='Watershed file (oversegmented)') + parser.add_argument( + '--pred_file', + required=True, + help='Prediction file (channel 2 must be mitochondria if use_mito = 1)') + parser.add_argument( + '--gt_file', + required=False, + help='Ground truth file') + parser.add_argument( + '--train_file', + required=False, + help='Pretrained classifier file') + parser.add_argument( + '-o', + '--outfile', + required=True, + help='Output file') + parser.add_argument('--num_iterations', default='1', + help='Number of training iterations') + parser.add_argument('--use_mito', default='0', + help='Toggles context-aware training with mitochrondria prediciton (0 or 1)') + return parser + +def npy_to_h5(file): + raw_arr = np.load(file) #X, Y, Z + raw_arr = np.transpose(np.squeeze(raw_arr),(2,0,1)) #Z, X, Y + fn = file.split('.')[0] + '.h5' + hf = h5py.File(fn, 'w') + hf.create_dataset('stack', data = raw_arr.astype('int32')) + hf.close() + return fn + +def boundary_preprocessing(membrane): + """ + Takes an npy membrane boundary file and turns it into a probability h5 file + """ + if membrane[-3:] != 'npy': + raise Exception('Prediction file must be an npy or h5 file') + + mem_arr = np.load(membrane) # X, Y, Z + mem_arr = np.transpose(np.squeeze(mem_arr),(2,0,1)) #Z, X, Y + mem_arr = mem_arr/np.max(mem_arr) + cyto_arr = 1 - mem_arr + + pred_arr = np.array([mem_arr, cyto_arr]) # chan, Z, X, Y + pred_arr = np.moveaxis(pred_arr, 0,3) # Z,X,Y,chan + + pred_file = h5py.File(membrane[:-3]+'h5', 'w') + pred_file.create_group('volume').create_dataset('predictions', data = pred_arr.astype('float32')) + pred_file.close() + return membrane[:-3]+'h5' + +def train(args): + if args.gt_file[-2:] != 'h5': + args.gt_file = npy_to_h5(args.gt_file) + + proc = call(['neuroproof_graph_learn', args.ws_file, args.pred_file, args.gt_file, + "--num-iterations", args.num_iterations, + "--use_mito", args.use_mito, + "--classifier-name", args.outfile]) + if proc != 0: + raise SystemError('Child process failed with exit code {}... exiting...'.format(proc)) + +def deploy(args): + proc = call(['neuroproof_graph_predict', args.ws_file, args.pred_file, args.train_file]) + if proc != 0: + raise SystemError('Child process failed with exit code {}... exiting...'.format(proc)) + f = h5py.File('segmentation.h5', 'r') + seg_array = f['stack'] + np.save(args.outfile, seg_array) + return + +if __name__ == '__main__': + parser = get_parser() + args = parser.parse_args() + if args.pred_file[-2:] != 'h5': + args.pred_file = boundary_preprocessing(args.pred_file) + if args.ws_file[-2:] != 'h5': + args.ws_file = npy_to_h5(args.ws_file) + if(int(args.mode)==0): + train(args) + else: + deploy(args) diff --git a/saber/i2g/neuroproof/neuroproof.cwl b/saber/i2g/neuroproof/neuroproof.cwl new file mode 100644 index 0000000..0ee9edb --- /dev/null +++ b/saber/i2g/neuroproof/neuroproof.cwl @@ -0,0 +1,67 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: aplbrain/neuroproof +baseCommand: python3 +arguments: ["driver.py"] +inputs: + mode: + type: string + inputBinding: + position: 1 + prefix: --mode + ws_file: + type: File + inputBinding: + position: 2 + prefix: --ws_file + pred_file: + type: File + inputBinding: + position: 2 + prefix: --pred_file + gt_file: + type: File? + inputBinding: + position: 3 + prefix: --gt_file + class_file: + type: File? + inputBinding: + position: 4 + prefix: --train_file + iterations: + type: string? + inputBinding: + position: 5 + prefix: --num_iterations + use_mito: + type: string? + inputBinding: + position: 6 + prefix: --use_mito + outfile: + type: string + inputBinding: + position: 7 + prefix: --outfile +outputs: + neuroproof_out: + type: File + outputBinding: + glob: $(inputs.outfile) diff --git a/saber/i2g/seg_syn_association/assoc.cwl b/saber/i2g/seg_syn_association/assoc_boss.cwl similarity index 97% rename from saber/i2g/seg_syn_association/assoc.cwl rename to saber/i2g/seg_syn_association/assoc_boss.cwl index b77b2c2..1e1eaf6 100644 --- a/saber/i2g/seg_syn_association/assoc.cwl +++ b/saber/i2g/seg_syn_association/assoc_boss.cwl @@ -18,9 +18,9 @@ cwlVersion: v1.0 class: CommandLineTool hints: DockerRequirement: - dockerPull: aplbrain/i2g:assoc + dockerPull: aplbrain/i2gassoc baseCommand: python -arguments: ['/app/seg_syn_assoc.py'] +arguments: ['/app/seg_syn_assoc_boss.py'] inputs: # config: # type: File? diff --git a/saber/i2g/seg_syn_association/assoc_local.cwl b/saber/i2g/seg_syn_association/assoc_local.cwl new file mode 100644 index 0000000..79bb142 --- /dev/null +++ b/saber/i2g/seg_syn_association/assoc_local.cwl @@ -0,0 +1,68 @@ +# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: aplbrain/i2gassoc +baseCommand: python +arguments: ['/app/seg_syn_assoc.py'] +inputs: + seg_file: + type: File + inputBinding: + position: 1 + prefix: --seg_file + syn_file: + type: File + inputBinding: + position: 2 + prefix: --syn_file + output_name: + type: string + inputBinding: + position: 3 + prefix: --output + output_name_noneu: + type: string + inputBinding: + position: 4 + prefix: --output_noneu + dilation: + type: string? + inputBinding: + position: 5 + prefix: --dilation + threshold: + type: string? + inputBinding: + position: 6 + prefix: --threshold + blob: + type: string? + inputBinding: + position: 7 + prefix: --blob +outputs: + assoc_output: + type: File + outputBinding: + glob: $(inputs.output_name) + assoc_output_noneu: + type: File + outputBinding: + glob: $(inputs.output_name_noneu) diff --git a/saber/i2g/seg_syn_association/seg_syn_assoc.py b/saber/i2g/seg_syn_association/seg_syn_assoc.py index 61bbc91..7bbdd1f 100644 --- a/saber/i2g/seg_syn_association/seg_syn_assoc.py +++ b/saber/i2g/seg_syn_association/seg_syn_assoc.py @@ -151,7 +151,7 @@ def edge_list_cv(neurons, synapses, dilation=5, syn_thres=0.8, blob_thres=4000): required=False, help='Blob size threshold') parser.add_argument( - '--lbl_file', + '--syn_file', required=False, help='Local synapse file' ) @@ -178,7 +178,7 @@ def edge_list_cv(neurons, synapses, dilation=5, syn_thres=0.8, blob_thres=4000): if args.dilation: dilation = args.dilation - neu_syn_list,syn_list = edge_list_cv(seg, syn, dilation=dilation, syn_thres=threshold, blob_thres=blob) + neu_syn_list,syn_list = edge_list_cv(seg, syn, dilation=dilation, syn_thres=threshold, blob_thres=blob_thres) neu_syn_list = pd.DataFrame.from_dict(neu_syn_list) syn_list = pd.DataFrame.from_dict(syn_list) diff --git a/saber/postprocessing/blob_detect/Dockerfile b/saber/postprocessing/blob_detect/Dockerfile new file mode 100644 index 0000000..3281246 --- /dev/null +++ b/saber/postprocessing/blob_detect/Dockerfile @@ -0,0 +1,21 @@ +# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM python:3.7-slim + +RUN pip install numpy \ + && pip install scikit-image +WORKDIR /src/ +COPY *.py /src/ +ENTRYPOINT ["python", "blob_detect.py"] \ No newline at end of file diff --git a/saber/postprocessing/blob_detect/README.md b/saber/postprocessing/blob_detect/README.md new file mode 100644 index 0000000..681c650 --- /dev/null +++ b/saber/postprocessing/blob_detect/README.md @@ -0,0 +1,21 @@ +# Blob Detect Tool +Author: Daniel Xenes +Takes a volume_thresholded volume (binary volume) and finds blobs in it (hopefully cell bodies). + +Inputs: + input - (str) binary map input file + max - (float) maximum area to be counted + min - (float) minimum area to be counted + outfil - (str) output file name + +outputs: + + MxN Array containing centroids where + M is number of blobs and + N is ndim of input array + +## How to use + +`docker run aplbrain/blob_detect -i INPUT_FILE --min MINIMUM --max MAXIMUM --outfile OUTPUT_FILE` + +Input files must be numpy .npy files. \ No newline at end of file diff --git a/saber/postprocessing/blob_detect/blob_detect.cwl b/saber/postprocessing/blob_detect/blob_detect.cwl new file mode 100644 index 0000000..5dda635 --- /dev/null +++ b/saber/postprocessing/blob_detect/blob_detect.cwl @@ -0,0 +1,47 @@ +# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: aplbrain/blob_detect +baseCommand: python +arguments: ["blob_detect.py"] +inputs: + input: + type: File + inputBinding: + position: 1 + prefix: --input + min: + type: string + inputBinding: + position: 2 + prefix: --min + max: + type: string + inputBinding: + position: 3 + prefix: --max + outfile: + type: string + inputBinding: + position: 4 + prefix: --outfile +outputs: + blob_detect_out: + type: File + outputBinding: + glob: $(inputs.outfile) diff --git a/saber/postprocessing/blob_detect/blob_detect.py b/saber/postprocessing/blob_detect/blob_detect.py new file mode 100644 index 0000000..f1d2400 --- /dev/null +++ b/saber/postprocessing/blob_detect/blob_detect.py @@ -0,0 +1,53 @@ +# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from skimage.measure import label, regionprops +import argparse + + +def get_parser(): + parser = argparse.ArgumentParser(description="Blob Detect Tool") + parser.set_defaults(func=lambda _: parser.print_help()) + parser.add_argument("-i", "--input", required=True, help="Input numpy array file") + parser.add_argument( + "--min", required=True, help="minimum area for region to be counted" + ) + parser.add_argument( + "--max", required=True, help="maximum area for region to be counted" + ) + parser.add_argument("-o", "--outfile", required=True, help="Output file") + return parser + + +def blob_detect(dense_map, min, max): + labels = label(dense_map) + regions = regionprops(labels) + output = np.empty((0, dense_map.ndim)) + for props in regions: + if props.area >= float(min) and props.area <= float(max): + output = np.concatenate((output, [props.centroid]), axis=0) + return output + + +def main(): + parser = get_parser() + args = parser.parse_args() + input_array = np.load(args.input) + output_array = blob_detect(input_array, min=args.min, max=args.max) + np.save(args.outfile, output_array) + + +if __name__ == "__main__": + main() diff --git a/saber/postprocessing/blob_detect/test_workflow/example_blob_detect.cwl b/saber/postprocessing/blob_detect/test_workflow/example_blob_detect.cwl new file mode 100644 index 0000000..551ab03 --- /dev/null +++ b/saber/postprocessing/blob_detect/test_workflow/example_blob_detect.cwl @@ -0,0 +1,44 @@ +# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cwlVersion: v1.0 +class: Workflow +doc: local + +cwlVersion: v1.0 +class: Workflow +inputs: + input: File + min: string + max: string + outfile: string + +outputs: + blob_detect_output: + type: File + outputSource: blob_detect/blob_detect_out +steps: + blob_detect: + run: ../blob_detect.cwl + in: + input: input + min: min + max: max + outfile: outfile + out: + [blob_detect_out] + hints: + saber: + local: True + file_path: /Users/xenesd1/Projects/aplbrain/saber/output \ No newline at end of file diff --git a/saber/postprocessing/threshold/Dockerfile b/saber/postprocessing/threshold/Dockerfile new file mode 100644 index 0000000..5431a5f --- /dev/null +++ b/saber/postprocessing/threshold/Dockerfile @@ -0,0 +1,18 @@ +# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +FROM python:3.7-slim +RUN pip install numpy +WORKDIR /src/ +COPY *.py /src/ +ENTRYPOINT ["python", "threshold.py"] diff --git a/saber/postprocessing/threshold/README.md b/saber/postprocessing/threshold/README.md new file mode 100644 index 0000000..83e5a59 --- /dev/null +++ b/saber/postprocessing/threshold/README.md @@ -0,0 +1,9 @@ +# Volume Threshold Tool +Author: Daniel Xenes +Applies a threshold to a volume and outputs the binarized array. Prints an F1 score if groundtruth is provided. + +## How to use + +`docker run aplbrain/threshold -i INPUT_FILE -t THRESHOLD -o OUTPUT_FILE -gt GROUNDTRUTH_FILE` + +Input files must be numpy .npy files. diff --git a/saber/postprocessing/threshold/test_workflow/example_job.yaml b/saber/postprocessing/threshold/test_workflow/example_job.yaml new file mode 100644 index 0000000..6a5771d --- /dev/null +++ b/saber/postprocessing/threshold/test_workflow/example_job.yaml @@ -0,0 +1,27 @@ +#Boss files +host_name: api.bossdb.io +coord: GT-VS0172 +token: +coll: GTXrayData +exp: VS0172 +chan_labels: ilastik_dense_c_pixel_ahb +chan_img: full_vol +dtype_img: uint8 +dtype_lbl: uint64 +itype_name: image +res: 0 +xmin: 4400 +xmax: 4656 +ymin: 343 +ymax: 599 +zmin: 211 +zmax: 300 +padding: 0 + +#threshold input +threshold: 0.5 + +#Output File Names +raw_pull_output_name: pull_output.npy +anno_pull_output_name: anno_output.npy +threshold_output_name: threshold_output.npy \ No newline at end of file diff --git a/saber/postprocessing/threshold/test_workflow/example_workflow.cwl b/saber/postprocessing/threshold/test_workflow/example_workflow.cwl new file mode 100644 index 0000000..fbaac59 --- /dev/null +++ b/saber/postprocessing/threshold/test_workflow/example_workflow.cwl @@ -0,0 +1,123 @@ +# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cwlVersion: v1.0 +class: Workflow +doc: local + +cwlVersion: v1.0 +class: Workflow +inputs: + coord: string + token: string + host_name: string + coll: string + exp: string + chan_labels: string + chan_img: string + dtype_img: string + dtype_lbl: string + itype_name: string + padding: int + res: int + xmin: int + xmax: int + ymin: int + ymax: int + zmin: int + zmax: int + raw_pull_output_name: string + anno_pull_output_name: string + + threshold: string + threshold_output_name: string + +outputs: + pull_output: + type: File + outputSource: raw_boss_pull/pull_output + anno_output: + type: File + outputSource: anno_boss_pull/pull_output + threshold_output: + type: File + outputSource: threshold/threshold_out + +steps: + raw_boss_pull: + run: ../../../boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll + exp_name: exp + chan_name: chan_img + dtype_name: dtype_img + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + itype_name: itype_name + padding: padding + output_name: raw_pull_output_name + coord_name: coord + out: + [pull_output] + hints: + saber: + local: True + file_path: /Users/xenesd1/Projects/aplbrain/saber/output + anno_boss_pull: + run: ../../../boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll + exp_name: exp + chan_name: chan_labels + dtype_name: dtype_lbl + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + itype_name: itype_name + padding: padding + output_name: anno_pull_output_name + coord_name: coord + out: + [pull_output] + hints: + saber: + local: True + file_path: /Users/xenesd1/Projects/aplbrain/saber/output + threshold: + run: ../threshold.cwl + in: + input: raw_boss_pull/pull_output +# groundtruth: anno_boss_pull/pull_output + threshold: threshold + outfile: threshold_output_name + out: + [threshold_out] + hints: + saber: + local: True + file_path: /Users/xenesd1/Projects/aplbrain/saber/output + score_format: "F1: {score}" \ No newline at end of file diff --git a/saber/postprocessing/threshold/threshold.cwl b/saber/postprocessing/threshold/threshold.cwl new file mode 100644 index 0000000..20d0e04 --- /dev/null +++ b/saber/postprocessing/threshold/threshold.cwl @@ -0,0 +1,47 @@ +# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: aplbrain/threshold +baseCommand: python +arguments: ["threshold.py"] +inputs: + input: + type: File + inputBinding: + position: 1 + prefix: --input + groundtruth: + type: File? + inputBinding: + position: 2 + prefix: --groundtruth + threshold: + type: string + inputBinding: + position: 3 + prefix: --threshold + outfile: + type: string + inputBinding: + position: 4 + prefix: --outfile +outputs: + threshold_out: + type: File + outputBinding: + glob: $(inputs.outfile) diff --git a/saber/postprocessing/threshold/threshold.py b/saber/postprocessing/threshold/threshold.py new file mode 100644 index 0000000..50eda8f --- /dev/null +++ b/saber/postprocessing/threshold/threshold.py @@ -0,0 +1,98 @@ +# Copyright 2020 The Johns Hopkins University Applied Physics Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import math +import argparse + + +def get_parser(): + parser = argparse.ArgumentParser(description="Thresholding Tool") + parser.set_defaults(func=lambda _: parser.print_help()) + parser.add_argument("-i", "--input", required=True, help="Input numpy array file") + parser.add_argument( + "-gt", "--groundtruth", required=False, help="Groundtruth numpy array file" + ) + parser.add_argument("-t", "--threshold", required=True, help="Threshold, [0,1]") + parser.add_argument("-o", "--outfile", required=True, help="Output file") + return parser + + +def apply_threshold(probability_map, threshold): + """ + Aapplies a threshold to a real-valued probabilty map + + Args: + probability_map: (numPy Array) + threshold: (float) Threshold to apply + Returns: + numPy Array + """ + threshold = float(threshold) + if threshold < 0 or threshold > 1: + raise ValueError("Invalid threshold. Threshold must be between 0 and 1.") + if probability_map.ndim == 4: + # Input data is in Z, Chan, Y, X (Xbrain defacto) + probability_map = np.squeeze(probability_map).T + normal = (probability_map - np.min(probability_map)) / ( + np.max(probability_map) - np.min(probability_map) + ) + normal[normal < threshold] = 0 + normal[normal >= threshold] = 1 + return normal + + +def f1_score(binary_map, binary_gt=None): + """ + Calculates f1 score on thresholded array. + """ + beta = 2 + true_detect = np.sum(np.logical_and(binary_map, binary_gt).astype(int).ravel()) + detections = np.sum(binary_map.ravel()) + true_positives = np.sum(binary_gt.ravel()) + if detections > 0: + precision = true_detect / detections + else: + precision = 0 + if true_positives > 0: + recall = true_detect / true_positives + else: + recall = 0 + + if precision + recall > 0: + f1 = ( + (1 + math.pow(beta, 2)) + * (precision * recall) + / (math.pow(beta, 2) * precision + recall) + ) + else: + f1 = 0 + return f1 + + +def main(): + parser = get_parser() + args = parser.parse_args() + input_array = np.load(args.input) + output_array = apply_threshold(input_array, args.threshold) + np.save(args.outfile, output_array) + + if args.groundtruth: + groundtruth_array = np.load(args.groundtruth) + f1 = f1_score(output_array, groundtruth_array) + print("F1: {}".format(f1)) + + +if __name__ == "__main__": + main() diff --git a/saber/xbrain/Dockerfile b/saber/xbrain/Dockerfile index 836d2bb..28a2d20 100644 --- a/saber/xbrain/Dockerfile +++ b/saber/xbrain/Dockerfile @@ -25,9 +25,11 @@ RUN conda install --yes --force libgfortran scipy \ RUN pip install --no-cache-dir mahotas RUN pip install --no-cache-dir ndparse RUN pip install --no-cache-dir nibabel +RUN pip install --no-cache-dir blosc==1.4.4 RUN mkdir app RUN git clone https://github.com/jhuapl-boss/intern.git && cd intern && git checkout RemoteExtension && git pull && python3 setup.py install --user ADD ./unsupervised_celldetect.py /app/unsupervised_celldetect.py #Necessary to use for galaxy ADD --chown=1000:100 ./xbrain.py ./process-xbrain.py ./split_cells.py /app/ ENV PATH /app:$PATH +USER root diff --git a/saber/xbrain/example2d/job.yml b/saber/xbrain/example2d/job.yml index bd54036..6480325 100644 --- a/saber/xbrain/example2d/job.yml +++ b/saber/xbrain/example2d/job.yml @@ -14,10 +14,10 @@ data: class: File - path: data.nii + path: /your/path/to/data.nii cell_gt: class: File - path: tj_anno.nii + path: /your/path/to/tj_anno.nii detect_threshold: 0.67 stop: 0.47 initial_template_size: 18 @@ -26,4 +26,4 @@ erode: 1 max_cells: 400 num_samp: 500000 num_comp: 2 -_saber_bucket: saber-batch \ No newline at end of file +_saber_bucket: saber-batch diff --git a/saber/xbrain/jobs/cell_detect/batch.cwl b/saber/xbrain/jobs/cell_detect/batch.cwl new file mode 100644 index 0000000..cecdd44 --- /dev/null +++ b/saber/xbrain/jobs/cell_detect/batch.cwl @@ -0,0 +1,192 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +inputs: + use_boss: int + coord: string + token: string + host_name: string + coll: string + exp: string + chan_labels: string + chan_img: string + dtype_img: string + dtype_lbl: string + itype_name: string + padding: int + res: int + xmin: int + xmax: int + ymin: int + ymax: int + zmin: int + zmax: int + + # Unet training + train_pct: float + n_epochs: int + mb_size: int + n_mb_per_epoch: int + use_adam: boolean + learning_rate: float + decay: float + momentum: float + beta1: float + beta2: float + save_freq: int + do_warp: boolean + tile_size: int + + #Unet Classify + classify_output_name: string + + threshold: float + stop: float + initial_template_size: int + detect_dilation: int +# max_cells: int + dense_output_name: string + + optimize_output: string + score_out: string + raw_pull_output_name: string + anno_pull_output_name: string + detect_output_name: string + metrics_out: string +outputs: + pull_output: + type: File + outputSource: raw_boss_pull/pull_output + anno_output: + type: File + outputSource: anno_boss_pull/pull_output + train_output: + type: File + outputSource: optimize/classifier_weights + membrane_output: + type: File + outputSource: classify/membrane_probability_map + cell_detect_output: + type: File + outputSource: cell_detect/cell_detect_results + metric_output: + type: File + outputSource: metrics/metrics +steps: + raw_boss_pull: + run: ../../../boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll + exp_name: exp + chan_name: chan_img + dtype_name: dtype_img + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + itype_name: itype_name + padding: padding + output_name: raw_pull_output_name + coord_name: coord + out: + [pull_output] + anno_boss_pull: + run: ../../../boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll + exp_name: exp + chan_name: chan_labels + dtype_name: dtype_lbl + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + itype_name: itype_name + padding: padding + output_name: anno_pull_output_name + coord_name: coord + out: + [pull_output] + optimize: + run: ../../tools/membrane_unets_train.cwl + in: + use_boss: use_boss + coord: coord + token: token + coll: coll + exp: exp + chan_labels: chan_labels + chan_img: chan_img + dtype_img: dtype_img + dtype_lbl: dtype_lbl + res: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + train_pct: train_pct + n_epochs: n_epochs + mb_size: mb_size + n_mb_per_epoch: n_mb_per_epoch + use_adam: use_adam + learning_rate: learning_rate + momentum: momentum + decay: decay + beta1: beta1 + beta2: beta2 + save_freq: save_freq + do_warp: do_warp + tile_size: tile_size + output: optimize_output + score_out: score_out + out: [classifier_weights, scores] + + classify: + run: ../../unets/deploy_unets.cwl + in: + img_file: raw_boss_pull/pull_output + lbl_file: anno_boss_pull/pull_output + weights_file: optimize/classifier_weights + output: classify_output_name + out: [membrane_probability_map] + hints: + saber: + score_format: "F1: {score}" + + cell_detect: + run: ../../tools/unsup_cell_detect_3D_nos3.cwl + in: + input: classify/membrane_probability_map + output_name: detect_output_name + threshold: threshold + stop: stop + initial_template_size: initial_template_size + dilation: detect_dilation +# max_cells: max_cells + dense_output_name: dense_output_name + out: [cell_detect_results, dense_output] + + metrics: + run: ../../tools/unsup_metrics_nos3.cwl + in: + input: cell_detect/cell_detect_results + output_name: metrics_out + ground_truth: anno_boss_pull/pull_output + initial_template_size: initial_template_size + out: [metrics] + hints: + saber: + score_format: "F1: {score}" \ No newline at end of file diff --git a/saber/xbrain/jobs/cell_detect/job.yml b/saber/xbrain/jobs/cell_detect/job.yml new file mode 100644 index 0000000..bbbc70d --- /dev/null +++ b/saber/xbrain/jobs/cell_detect/job.yml @@ -0,0 +1,54 @@ + +#Boss files +host_name: api.bossdb.io +coord: GT-VS0172 +token: +coll: GTXrayData +exp: VS0172 +chan_labels: ilastik_dense_c_pixel_ahb +chan_img: full_vol +dtype_img: uint8 +dtype_lbl: uint64 +itype_name: image +res: 0 +xmin: 4400 +xmax: 4656 +ymin: 343 +ymax: 599 +zmin: 211 +zmax: 300 +padding: 0 + +# Unet Train +use_boss: 1 +train_pct: 0.7 +n_epochs: 5 +mb_size: 4 +n_mb_per_epoch: 3 +learning_rate: 0.01 +use_adam: False +beta1: 0.9 +beta2: 0.999 +momentum: 0.99 +decay: 0.000001 +save_freq: 50 +do_warp: False +tile_size: 256 +_saber_bucket: saber-batch + +detect_threshold: 0.2 +stop: 0.47 +initial_template_size: 18 +detect_dilation: 8 +#max_cells: 500 + + +#Output File Names +raw_pull_output_name: pull_output.npy +anno_pull_output_name: anno_output.npy +optimize_output: new_weights.h5 +score_out: f1_score.npy +classify_output_name: probability_map.npy +detect_output_name: detect_output.npy +dense_output_name: dense_output.npy +metrics_out: metrics.npy \ No newline at end of file diff --git a/saber/xbrain/jobs/cell_detect/job_simple.yml b/saber/xbrain/jobs/cell_detect/job_simple.yml new file mode 100644 index 0000000..8ada75a --- /dev/null +++ b/saber/xbrain/jobs/cell_detect/job_simple.yml @@ -0,0 +1,63 @@ +#Boss files +host_name: api.bossdb.io +coord: GT-VS0172 +token: +coll: GTXrayData +exp: VS0172 +chan_labels: ilastik_dense_c_pixel_ahb +chan_img: full_vol +dtype_img: uint8 +dtype_lbl: uint64 +itype_name: image +res: 0 +padding: 0 + +# Coordinates +test_xmin: 4400 +test_xmax: 4656 +test_ymin: 343 +test_ymax: 599 +test_zmin: 211 +test_zmax: 300 + +train_xmin: 4400 +train_xmax: 5168 +train_ymin: 472 +train_ymax: 1240 +train_zmin: 211 +train_zmax: 300 + + +# Unet Train +use_boss: 1 +train_pct: 0.9 +n_epochs: 150 +mb_size: 6 +n_mb_per_epoch: 3 +learning_rate: 0.0001 +use_adam: False +beta1: 0.9 +beta2: 0.999 +momentum: 0.99 +decay: 0.000001 +save_freq: 50 +do_warp: False +tile_size: 256 +_saber_bucket: saber-batch + +threshold: 0.5 +min: 10 +max: 10000 +initial_template_size: 18 + + + +#Output File Names +raw_pull_output_name: pull_output.npy +anno_pull_output_name: anno_output.npy +optimize_output: new_weights.h5 +score_out: f1_score.npy +classify_output_name: probability_map.npy +threshold_output_name: threshold_output.npy +blob_detect_output_name: centroid.npy +metrics_out: metrics.npy \ No newline at end of file diff --git a/saber/xbrain/jobs/cell_detect/local.cwl b/saber/xbrain/jobs/cell_detect/local.cwl new file mode 100644 index 0000000..937cfec --- /dev/null +++ b/saber/xbrain/jobs/cell_detect/local.cwl @@ -0,0 +1,216 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +doc: local +inputs: + use_boss: int + coord: string? + token: string? + host_name: string? + coll: string? + exp: string? + chan_labels: string? + chan_img: string? + dtype_img: string? + dtype_lbl: string? + itype_name: string? + padding: int? + res: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + + # Unet training + train_pct: float? + n_epochs: int? + mb_size: int? + n_mb_per_epoch: int? + use_adam: int? + learning_rate: float? + decay: float? + momentum: float? + beta1: float? + beta2: float? + save_freq: int? + do_warp: boolean? + tile_size: int? +# weights_file: File? + + #Unet Classify + classify_output_name: string + + detect_threshold: float? + stop: float? + initial_template_size: int? + detect_dilation: int? +# max_cells: int? + dense_output_name: string + + optimize_output: string + score_out: string + raw_pull_output_name: string + anno_pull_output_name: string + detect_output_name: string + metrics_out: string +outputs: + pull_output: + type: File + outputSource: raw_boss_pull/pull_output + anno_output: + type: File + outputSource: anno_boss_pull/pull_output + train_output: + type: File + outputSource: optimize/classifier_weights + membrane_output: + type: File + outputSource: classify/membrane_probability_map + cell_detect_output: + type: File + outputSource: cell_detect/cell_detect_results + metric_output: + type: File + outputSource: metrics/metrics +steps: + raw_boss_pull: + run: ../../../boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll + exp_name: exp + chan_name: chan_img + dtype_name: dtype_img + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + itype_name: itype_name + padding: padding + output_name: raw_pull_output_name + coord_name: coord + out: + [pull_output] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + anno_boss_pull: + run: ../../../boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll + exp_name: exp + chan_name: chan_labels + dtype_name: dtype_lbl + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + itype_name: itype_name + padding: padding + output_name: anno_pull_output_name + coord_name: coord + out: + [pull_output] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + optimize: + run: ../../tools/membrane_unets_train.cwl + in: + use_boss: use_boss + coord: coord + token: token + coll: coll + exp: exp + chan_labels: chan_labels + chan_img: chan_img + dtype_img: dtype_img + dtype_lbl: dtype_lbl + res: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + train_pct: train_pct + n_epochs: n_epochs + mb_size: mb_size + n_mb_per_epoch: n_mb_per_epoch + use_adam: use_adam + learning_rate: learning_rate + momentum: momentum + decay: decay + beta1: beta1 + beta2: beta2 + save_freq: save_freq + do_warp: do_warp + tile_size: tile_size +# weights_file: weights_file + output: optimize_output + score_out: score_out + out: [classifier_weights, scores] + hints: + saber: + score_format: "F1: {score}" + local: True + file_path: /home/ubuntu/saber/volumes/data/local + + classify: + run: ../../unets/deploy_unets.cwl + in: + img_file: raw_boss_pull/pull_output + lbl_file: anno_boss_pull/pull_output + weights_file: optimize/classifier_weights + output: classify_output_name + out: [membrane_probability_map] + hints: + saber: + score_format: "F1: {score}" + local: True + file_path: /home/ubuntu/saber/volumes/data/local + + cell_detect: + run: ../../tools/unsup_cell_detect_3D_nos3.cwl + in: + input: classify/membrane_probability_map + output_name: detect_output_name + threshold: detect_threshold + stop: stop + initial_template_size: initial_template_size + dilation: detect_dilation +# max_cells: max_cells + dense_output_name: dense_output_name + out: [cell_detect_results, dense_output] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/volumes/data/local + + metrics: + run: ../../tools/unsup_metrics_nos3.cwl + in: + input: cell_detect/cell_detect_results + output_name: metrics_out + ground_truth: anno_boss_pull/pull_output + initial_template_size: initial_template_size + out: [metrics] + hints: + saber: + score_format: "F1: {score}" + local: True + file_path: /home/ubuntu/saber/volumes/data/local \ No newline at end of file diff --git a/saber/xbrain/jobs/cell_detect/local_simple.cwl b/saber/xbrain/jobs/cell_detect/local_simple.cwl new file mode 100644 index 0000000..e6fbb72 --- /dev/null +++ b/saber/xbrain/jobs/cell_detect/local_simple.cwl @@ -0,0 +1,240 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +doc: local +inputs: + use_boss: int + coord: string + token: string + host_name: string + coll: string + exp: string + chan_labels: string + chan_img: string + dtype_img: string + dtype_lbl: string + itype_name: string + padding: int + res: int + + test_xmin: int + test_xmax: int + test_ymin: int + test_ymax: int + test_zmin: int + test_zmax: int + + train_xmin: int + train_xmax: int + train_ymin: int + train_ymax: int + train_zmin: int + train_zmax: int + + # Unet training + train_pct: float + n_epochs: int + mb_size: int + n_mb_per_epoch: int + use_adam: boolean + learning_rate: float + decay: float + momentum: float + beta1: float + beta2: float + save_freq: int + do_warp: boolean + tile_size: int + + # Threshold + threshold: string + + # Blob Detect + min: string + max: string + + # metrics + initial_template_size: int + + optimize_output: string + score_out: string + raw_pull_output_name: string + anno_pull_output_name: string + classify_output_name: string + threshold_output_name: string + blob_detect_output_name: string + metrics_out: string +outputs: + pull_output: + type: File + outputSource: raw_boss_pull/pull_output + anno_output: + type: File + outputSource: anno_boss_pull/pull_output + train_output: + type: File + outputSource: optimize/classifier_weights + membrane_output: + type: File + outputSource: classify/membrane_probability_map + threshold_output: + type: File + outputSource: threshold/threshold_out + blob_detect_output: + type: File + outputSource: blob_detect/blob_detect_out + metric_output: + type: File + outputSource: metrics/metrics +steps: + raw_boss_pull: + run: ../../../boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll + exp_name: exp + chan_name: chan_img + dtype_name: dtype_img + resolution: res + xmin: test_xmin + xmax: test_xmax + ymin: test_ymin + ymax: test_ymax + zmin: test_zmin + zmax: test_zmax + itype_name: itype_name + padding: padding + output_name: raw_pull_output_name + coord_name: coord + out: + [pull_output] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + use_cache: True + anno_boss_pull: + run: ../../../boss_access/boss_pull_nos3.cwl + in: + host_name: host_name + token: token + coll_name: coll + exp_name: exp + chan_name: chan_labels + dtype_name: dtype_lbl + resolution: res + xmin: test_xmin + xmax: test_xmax + ymin: test_ymin + ymax: test_ymax + zmin: test_zmin + zmax: test_zmax + itype_name: itype_name + padding: padding + output_name: anno_pull_output_name + coord_name: coord + out: + [pull_output] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + use_cache: True + optimize: + run: ../../tools/membrane_unets_train.cwl + in: + use_boss: use_boss + coord: coord + token: token + coll: coll + exp: exp + chan_labels: chan_labels + chan_img: chan_img + dtype_img: dtype_img + dtype_lbl: dtype_lbl + res: res + xmin: train_xmin + xmax: train_xmax + ymin: train_ymin + ymax: train_ymax + zmin: train_zmin + zmax: train_zmax + train_pct: train_pct + n_epochs: n_epochs + mb_size: mb_size + n_mb_per_epoch: n_mb_per_epoch + use_adam: use_adam + learning_rate: learning_rate + momentum: momentum + decay: decay + beta1: beta1 + beta2: beta2 + save_freq: save_freq + do_warp: do_warp + tile_size: tile_size + output: optimize_output + score_out: score_out + out: [classifier_weights, scores] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + score_format: "F1: {score}" + use_cache: True + classify: + run: ../../unets/deploy_unets.cwl + in: + img_file: raw_boss_pull/pull_output + lbl_file: anno_boss_pull/pull_output + weights_file: optimize/classifier_weights + output: classify_output_name + out: [membrane_probability_map] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + score_format: "F1: {score}" + threshold: + run: ../../../postprocessing/threshold/threshold.cwl + in: + input: classify/membrane_probability_map + groundtruth: anno_boss_pull/pull_output + threshold: threshold + outfile: threshold_output_name + out: + [threshold_out] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + score_format: "F1: {score}" + + blob_detect: + run: ../../../postprocessing/blob_detect/blob_detect.cwl + in: + input: threshold/threshold_out + min: min + max: max + outfile: blob_detect_output_name + out: + [blob_detect_out] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + + metrics: + run: ../../tools/unsup_metrics_nos3.cwl + in: + input: blob_detect/blob_detect_out + output_name: metrics_out + ground_truth: anno_boss_pull/pull_output + initial_template_size: initial_template_size + out: [metrics] + hints: + saber: + local: True + file_path: /home/ubuntu/saber/outputs + score_format: "F1: {score}" \ No newline at end of file diff --git a/saber/xbrain/jobs/cell_detect/params.yml b/saber/xbrain/jobs/cell_detect/params.yml new file mode 100644 index 0000000..fc155a8 --- /dev/null +++ b/saber/xbrain/jobs/cell_detect/params.yml @@ -0,0 +1,81 @@ +n_epochs: + range: + start: 5 + stop: 10 + step: 1 + parameters: + abs: n_epochs + steps: + - optimize +learning_rate: + range: + start: 0.005 + stop: 0.02 + step: 0.005 + parameters: + abs: learning_rate + steps: + - optimize +threshold: + range: + start: 0.2 + stop: 0.5 + step: 0.1 + parameters: + abs: detect_threshold + steps: + - cell_detect +stop: + range: + start: 0.4 + stop: 0.6 + step: 0.05 + parameters: + abs: stop + steps: + - cell_detect +# mb_size: +# range: +# start: 2 +# stop: 8 +# step: 2 +# parameters: +# abs: mb_size +# steps: +# - optimize +# n_mb_per_epoch: +# range: +# start: 2 +# stop: 4 +# step: 1 +# parameters: +# abs: n_mb_per_epoch +# steps: +# - optimize +# tile_size: +# range: +# start: 64 +# stop: 256 +# step: 64 +# parameters: +# abs: tile_size +# steps: +# - optimize +# decay: +# range: +# start: 0.0000005 +# stop: 0.000002 +# step: 0.0000005 +# parameters: +# abs: decay +# steps: +# - optimize +# momentum: +# range: +# start: 0.95 +# stop: 0.99 +# step: 0.01 +# parameters: +# abs: momentum +# steps: +# - optimize \ No newline at end of file diff --git a/saber/xbrain/jobs/unet_train_job/params.yml b/saber/xbrain/jobs/unet_train_job/params.yml new file mode 100644 index 0000000..eeb9935 --- /dev/null +++ b/saber/xbrain/jobs/unet_train_job/params.yml @@ -0,0 +1,54 @@ +n_epochs: + range: + start: 200 + stop: 300 + step: 25 + parameters: + abs: n_epochs + steps: + - optimize +mb_size: + range: + start: 3 + stop: 5 + step: 1 + parameters: + abs: mb_size + steps: + - optimize +n_mb_per_epoch: + range: + start: 2 + stop: 4 + step: 1 + parameters: + abs: n_mb_per_epoch + steps: + - optimize +learning_rate: + range: + start: 0.0001 + stop: 0.0004 + step: 0.0001 + parameters: + abs: learning_rate + steps: + - optimize +beta1: + range: + start: 0.88 + stop: 0.92 + step: 0.01 + parameters: + abs: beta1 + steps: + - optimize +beta2: + range: + start: 0.997 + stop: 0.999 + step: 0.001 + parameters: + abs: beta2 + steps: + -optimize diff --git a/saber/xbrain/jobs/unet_train_job/params_sgd.yml b/saber/xbrain/jobs/unet_train_job/params_sgd.yml new file mode 100644 index 0000000..94837bf --- /dev/null +++ b/saber/xbrain/jobs/unet_train_job/params_sgd.yml @@ -0,0 +1,63 @@ +n_epochs: + range: + start: 5 + stop: 10 + step: 1 + parameters: + abs: n_epochs + steps: + - optimize +mb_size: + range: + start: 2 + stop: 8 + step: 2 + parameters: + abs: mb_size + steps: + - optimize +n_mb_per_epoch: + range: + start: 2 + stop: 4 + step: 1 + parameters: + abs: n_mb_per_epoch + steps: + - optimize +#tile_size: +# range: +# start: 64 +# stop: 256 +# step: 64 +# parameters: +# abs: tile_size +# steps: +# - optimize +#learning_rate: +# range: +# start: 0.005 +# stop: 0.02 +# step: 0.005 +# parameters: +# abs: learning_rate +# steps: +# - optimize +#decay: +# range: +# start: 0.0000005 +# stop: 0.000002 +# step: 0.0000005 +# parameters: +# abs: decay +# steps: +# - optimize +# momentum: +# range: +# start: 0.95 +# stop: 0.99 +# step: 0.01 +# parameters: +# abs: momentum +# steps: +# - optimize \ No newline at end of file diff --git a/saber/xbrain/jobs/unet_train_job/xbrain_unets_celldetect_train.cwl b/saber/xbrain/jobs/unet_train_job/xbrain_unets_celldetect_train.cwl new file mode 100644 index 0000000..829133c --- /dev/null +++ b/saber/xbrain/jobs/unet_train_job/xbrain_unets_celldetect_train.cwl @@ -0,0 +1,164 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +inputs: + use_boss: int + coord: string? + token: string? + host_name: string? + coll: string? + exp: string? + chan_labels: string? + chan_img: string? + dtype_img: string? + dtype_lbl: string? + itype: string? + padding: int? + res: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + train_pct: float? + n_epochs: int? + mb_size: int? + n_mb_per_epoch: int? + use_adam: int? + learning_rate: float? + decay: float? + momentum: float? + beta1: float? + beta2: float? + save_freq: int? + do_warp: boolean? + tile_size: int? + weights_file: File? + + detect_threshold: float? + stop: float? + initial_template_size: int? + detect_dilation: int? + + output: string + score_out: string + raw_pull_output_name: string + anno_pull_output_name: string + metrics_out: string +outputs: + train_output: + type: File + outputSource: optimize/classifier_weights +steps: + raw_boss_pull: + run: ../../boss_access/boss_pull.cwl + in: + host_name: host_name + token: token + coll_name: coll_name + exp_name: exp_name + chan_name: chan_img + dtype_name: dtype_img + itype_name: itype + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: raw_pull_output_name + coord_name: coord_name + bucket: bucket + out: + [raw_pull_output] + anno_boss_pull: + run: ../../boss_access/boss_pull.cwl + in: + host_name: host_name + token: token + coll_name: coll_name + exp_name: exp_name + chan_name: chan_lbl + dtype_name: dtype_lbl + itype_name: itype + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: anno_pull_output_name + coord_name: coord_name + bucket: bucket + out: + [anno_pull_output] + optimize: + run: ../tools/membrane_unets_train.cwl + in: + use_boss: use_boss + img_file: raw_boss_pull/raw_pull_output + lbl_file: anno_boss_pull/anno_pull_output + coord: coord + token: token + coll: coll + exp: exp + chan_labels: chan_labels + chan_img: chan_img + dtype_img: dtype_img + dtype_lbl: dtype_lbl + res: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + train_pct: train_pct + n_epochs: n_epochs + mb_size: mb_size + n_mb_per_epoch: n_mb_per_epoch + use_adam: use_adam + learning_rate: learning_rate + momentum: momentum + decay: decay + beta1: beta1 + beta2: beta2 + save_freq: save_freq + do_warp: do_warp + tile_size: tile_size + weights_file: weights_file + output: output + score_out: score_out + out: [membrane_probability,classifier_weights,scores] + hints: + saber: + score_format: "F1: {score}" + local: True + cell_detect: + run: ../tools/unsup_cell_detect_3D_nos3.cwl + in: + input: membrane_classify/membrane_probability + output_name: optimize_output_name + threshold: detect_threshold + stop: stop + initial_template_size: initial_template_size + dilation: detect_dilation + max_cells: max_cells + out: [cell_detect_results] + metrics: + run: ../tools/unsup_metrics_3D_nos3.cwl + in: + input: cell_detect/cell_detect_results + output_name: metrics_out + groundtruth: anno_boss_pull/anno_pull_output + out: [metrics] + hints: + saber: + score_format: "F1: {score}" + local: True diff --git a/saber/xbrain/jobs/unet_train_job/xbrain_unets_ex_job.yml b/saber/xbrain/jobs/unet_train_job/xbrain_unets_ex_job.yml new file mode 100644 index 0000000..32f57a0 --- /dev/null +++ b/saber/xbrain/jobs/unet_train_job/xbrain_unets_ex_job.yml @@ -0,0 +1,33 @@ +# Unets +use_boss: 1 +#Boss files +coord: GT-VS0172 +img_file: +lbl_file: +token: +coll: GTXrayData +exp: VS0172 +chan_labels: ilastik_dense_c_pixel_ahb +chan_img: full_vol +dtype_img: uint8 +dtype_lbl: uint64 +res: 0 +xmin: 4400 +xmax: 4656 +ymin: 343 +ymax: 599 +zmin: 211 +zmax: 300 +train_pct: 0.7 +n_epochs: 250 +mb_size: 4 +n_mb_per_epoch: 3 +learning_rate: 0.0001 +beta1: 0.9 +beta2: 0.999 +save_freq: 50 +do_warp: False +tile_size: 256 +weights_file: +output: out_weights.h5 +score_out: f1_score.npy diff --git a/saber/xbrain/jobs/unet_train_job/xbrain_unets_ex_job_s3.yml b/saber/xbrain/jobs/unet_train_job/xbrain_unets_ex_job_s3.yml new file mode 100644 index 0000000..d7f3da4 --- /dev/null +++ b/saber/xbrain/jobs/unet_train_job/xbrain_unets_ex_job_s3.yml @@ -0,0 +1,34 @@ +# Unets +use_boss: 1 +#Boss files +coord: GT-VS0172 +img_file: merp +lbl_file: merp +token: +coll: GTXrayData +exp: VS0172 +chan_labels: ilastik_dense_c_pixel_ahb +chan_img: full_vol +dtype_img: uint8 +dtype_lbl: uint64 +res: 0 +xmin: 4400 +xmax: 4656 +ymin: 343 +ymax: 599 +zmin: 211 +zmax: 300 +train_pct: 0.7 +n_epochs: 250 +mb_size: 4 +n_mb_per_epoch: 3 +learning_rate: 0.0001 +beta1: 0.9 +beta2: 0.999 +save_freq: 50 +do_warp: False +tile_size: 256 +weights_file: None +output: new_weights.h5 +score_out: f1_score.npy +_saber_bucket: saber-batch diff --git a/saber/xbrain/jobs/unet_train_job/xbrain_unets_ex_job_sgd.yml b/saber/xbrain/jobs/unet_train_job/xbrain_unets_ex_job_sgd.yml new file mode 100644 index 0000000..db3c021 --- /dev/null +++ b/saber/xbrain/jobs/unet_train_job/xbrain_unets_ex_job_sgd.yml @@ -0,0 +1,34 @@ +# Unets +use_boss: 1 +#Boss files +coord: GT-VS0172 +token: +coll: GTXrayData +exp: VS0172 +chan_labels: ilastik_dense_c_pixel_ahb +chan_img: full_vol +dtype_img: uint8 +dtype_lbl: uint64 +res: 0 +xmin: 4400 +xmax: 4656 +ymin: 343 +ymax: 599 +zmin: 211 +zmax: 300 +train_pct: 0.7 +n_epochs: 5 +mb_size: 4 +n_mb_per_epoch: 3 +learning_rate: 0.01 +use_adam: False +beta1: 0.9 +beta2: 0.999 +momentum: 0.99 +decay: 0.000001 +save_freq: 50 +do_warp: False +tile_size: 256 +output: new_weights.h5 +score_out: f1_score.npy +_saber_bucket: saber-batch diff --git a/saber/xbrain/jobs/unet_train_job/xbrain_unets_train.cwl b/saber/xbrain/jobs/unet_train_job/xbrain_unets_train.cwl new file mode 100644 index 0000000..ddeeeb5 --- /dev/null +++ b/saber/xbrain/jobs/unet_train_job/xbrain_unets_train.cwl @@ -0,0 +1,86 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +inputs: + use_boss: int + img_file: File? + lbl_file: File? + coord: string? + token: string? + coll: string? + exp: string? + chan_labels: string? + chan_img: string? + dtype_img: string? + dtype_lbl: string? + res: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + train_pct: float? + n_epochs: int? + mb_size: int? + n_mb_per_epoch: int? + learning_rate: float? + use_adam: boolean? + beta1: float? + beta2: float? + decay: float? + momentum: float? + save_freq: int? + do_warp: boolean? + tile_size: int? + weights_file: File? + output: string + score_out: string +outputs: + train_output: + type: File + outputSource: optimize/classifier_weights +steps: + optimize: + run: ../../tools/membrane_unets_train.cwl + in: + use_boss: use_boss + img_file: img_file + lbl_file: lbl_file + coord: coord + token: token + coll: coll + exp: exp + chan_labels: chan_labels + chan_img: chan_img + dtype_img: dtype_img + dtype_lbl: dtype_lbl + res: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + train_pct: train_pct + n_epochs: n_epochs + mb_size: mb_size + n_mb_per_epoch: n_mb_per_epoch + learning_rate: learning_rate + use_adam: use_adam + beta1: beta1 + beta2: beta2 + decay: decay + momentum: momentum + save_freq: save_freq + do_warp: do_warp + tile_size: tile_size + weights_file: weights_file + output: output + score_out: score_out + out: [classifier_weights,scores] + hints: + saber: + score_format: "F1: {score}\n" + local: True diff --git a/saber/xbrain/jobs/unet_train_job/xbrain_unets_train_sgd.cwl b/saber/xbrain/jobs/unet_train_job/xbrain_unets_train_sgd.cwl new file mode 100644 index 0000000..6daccd5 --- /dev/null +++ b/saber/xbrain/jobs/unet_train_job/xbrain_unets_train_sgd.cwl @@ -0,0 +1,82 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +doc: local +inputs: + use_boss: int + coord: string? + token: string? + coll: string? + exp: string? + chan_labels: string? + chan_img: string? + dtype_img: string? + dtype_lbl: string? + res: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + train_pct: float? + n_epochs: int? + mb_size: int? + n_mb_per_epoch: int? + learning_rate: float? + use_adam: boolean? + beta1: float? + beta2: float? + decay: float? + momentum: float? + save_freq: int? + do_warp: boolean? + tile_size: int? + output: string + score_out: string +outputs: + train_output: + type: File + outputSource: optimize/classifier_weights +steps: + optimize: + run: ../../tools/membrane_unets_train.cwl + in: + use_boss: use_boss + coord: coord + token: token + coll: coll + exp: exp + chan_labels: chan_labels + chan_img: chan_img + dtype_img: dtype_img + dtype_lbl: dtype_lbl + res: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + train_pct: train_pct + n_epochs: n_epochs + mb_size: mb_size + n_mb_per_epoch: n_mb_per_epoch + learning_rate: learning_rate + use_adam: use_adam + beta1: beta1 + beta2: beta2 + decay: decay + momentum: momentum + save_freq: save_freq + do_warp: do_warp + tile_size: tile_size + output: output + score_out: score_out + out: [classifier_weights,scores] + hints: + saber: + score_format: "F1: {score}\n" + local: True + file_path: /home/ubuntu/saber/volumes/data/local diff --git a/saber/xbrain/tools/membrane_classify_nos3.cwl b/saber/xbrain/tools/membrane_classify_nos3.cwl index ca17448..1b4cb36 100644 --- a/saber/xbrain/tools/membrane_classify_nos3.cwl +++ b/saber/xbrain/tools/membrane_classify_nos3.cwl @@ -20,8 +20,8 @@ hints: DockerRequirement: # dockerPull: xbrain:airflow-docker dockerPull: aplbrain/xbrain:latest -baseCommand: process-xbrain.py -arguments: ["classify"] +baseCommand: python +arguments: ["/app/process-xbrain.py", "classify"] inputs: input: type: File diff --git a/saber/xbrain/tools/membrane_unets_train.cwl b/saber/xbrain/tools/membrane_unets_train.cwl new file mode 100644 index 0000000..e4ae17f --- /dev/null +++ b/saber/xbrain/tools/membrane_unets_train.cwl @@ -0,0 +1,204 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: aplbrain/unets +baseCommand: python +arguments: [/src/train_unet_docker.py] +inputs: + use_boss: + type: int + default: 1 + inputBinding: + position: 1 + prefix: --use_boss + img_file: + type: File? + inputBinding: + position: 2 + prefix: --img_file + lbl_file: + type: File? + inputBinding: + position: 3 + prefix: --lbl_file + coord: + type: string? + inputBinding: + position: 4 + prefix: --coord + token: + type: string? + inputBinding: + position: 5 + prefix: --token + coll: + type: string? + inputBinding: + position: 6 + prefix: --coll + exp: + type: string? + inputBinding: + position: 7 + prefix: --exp + chan_labels: + type: string? + inputBinding: + position: 8 + prefix: --chan_labels + chan_img: + type: string? + inputBinding: + position: 9 + prefix: --chan_img + dtype_img: + type: string? + inputBinding: + position: 10 + prefix: --dtype_img + dtype_lbl: + type: string? + inputBinding: + position: 11 + prefix: --dtype_lbl + res: + type: int? + default: 0 + inputBinding: + position: 12 + prefix: --res + xmin: + type: int? + inputBinding: + position: 13 + prefix: --xmin + xmax: + type: int? + inputBinding: + position: 14 + prefix: --xmax + ymin: + type: int? + inputBinding: + position: 15 + prefix: --ymin + ymax: + type: int? + inputBinding: + position: 16 + prefix: --ymax + zmin: + type: int? + inputBinding: + position: 17 + prefix: --zmin + zmax: + type: int? + inputBinding: + position: 18 + prefix: --zmax + train_pct: + type: float? + default: 0.5 + inputBinding: + position: 19 + prefix: --train_pct + n_epochs: + type: int? + default: 10 + inputBinding: + position: 20 + prefix: --n_epochs + mb_size: + type: int? + default: 4 + inputBinding: + position: 21 + prefix: --mb_size + n_mb_per_epoch: + type: int? + default: 3 + inputBinding: + position: 22 + prefix: --n_mb_per_epoch + use_adam: + type: boolean? + default: False + inputBinding: + position: 23 + prefix: --use_adam + learning_rate: + type: float? + default: 0.0001 + inputBinding: + position: 24 + prefix: --learning_rate + momentum: + type: float? + default: 0.99 + inputBinding: + position: 25 + prefix: --momentum + decay: + type: float? + default: 0.000001 + inputBinding: + position: 26 + prefix: --decay + beta1: + type: float? + default: 0.9 + inputBinding: + position: 27 + prefix: --beta1 + beta2: + type: float? + default: 0.999 + inputBinding: + position: 28 + prefix: --beta2 + save_freq: + type: int? + default: 50 + inputBinding: + position: 29 + prefix: --save_freq + do_warp: + type: boolean? + default: False + inputBinding: + position: 30 + prefix: --do_warp + tile_size: + type: int? + default: 256 + inputBinding: + position: 31 + prefix: --tile_size + weights_file: + type: File? + inputBinding: + position: 32 + prefix: --weights_file + output: + type: string + inputBinding: + position: 33 + prefix: --output + score_out: + type: string + inputBinding: + position: 34 + prefix: --score_out +outputs: + classifier_weights: + type: File + outputBinding: + glob: $(inputs.output) + scores: + type: File + outputBinding: + glob: $(inputs.score_out) diff --git a/saber/xbrain/tools/unsup_metrics_nos3.cwl b/saber/xbrain/tools/unsup_metrics_nos3.cwl index 572252c..9e32dab 100644 --- a/saber/xbrain/tools/unsup_metrics_nos3.cwl +++ b/saber/xbrain/tools/unsup_metrics_nos3.cwl @@ -21,7 +21,7 @@ hints: # dockerPull: xbrain:airflow-docker dockerPull: aplbrain/xbrain:latest baseCommand: python -arguments: ["/app/unsupervised_celldetect.py","metrics"] +arguments: ["/app/unsupervised_celldetect.py","metrics3D"] inputs: input: type: File diff --git a/saber/xbrain/unets/Dockerfile b/saber/xbrain/unets/Dockerfile new file mode 100644 index 0000000..1b00d9f --- /dev/null +++ b/saber/xbrain/unets/Dockerfile @@ -0,0 +1,61 @@ +# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM kaixhin/cuda-theano:8.0 + +# Install dependencies +RUN apt-get update && apt-get install -y \ + libhdf5-dev \ + python-h5py \ + python-yaml \ + python3-pip \ + vim + +RUN pip install --upgrade pip +# Upgrade six +RUN pip install --upgrade six + +# Clone Keras repo and move into it +#RUN cd /root && git clone https://github.com/fchollet/keras.git && cd keras && \ +# # Install +# python setup.py install +RUN pip install --ignore-installed keras +#==1.2.2 +#RUN pip install --ignore-installed pygpu +#added + +# Install intern +RUN pip install --ignore-installed intern +#RUN pip install awscli +#RUN pip install boto3 +RUN pip install --ignore-installed SimpleITK + +#Default python2 doesn't include enum34 +RUN pip install enum34 + +#Theano needs numpy ver < 1.16.0 to work +RUN pip install numpy==1.15.4 + +# Create workspace +# TODO: Re-org this to use git clone and S3 +WORKDIR /src +#COPY ./weights/*.hdf5 /src/weights/ +#COPY ./aws-batch/setup/startup.sh /src/ +#COPY ./*.json /src/ +COPY ./*.py /src/ + +ENV KERAS_BACKEND=theano +ENV PATH=/src:$PATH + +ENV THEANO_FLAGS="device=cuda0" +#ENV THEANO_FLAGS='device=cuda,lib.cnmem=1' + +CMD ["python", "train_unet_docker.py"] diff --git a/saber/xbrain/unets/Dockerfile.train b/saber/xbrain/unets/Dockerfile.train new file mode 100644 index 0000000..3db3d07 --- /dev/null +++ b/saber/xbrain/unets/Dockerfile.train @@ -0,0 +1,62 @@ +# Copyright 2018 The Johns Hopkins University Applied Physics Laboratory. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM kaixhin/cuda-theano:7.5 + +# Install dependencies +RUN apt-get update && apt-get install -y \ + libhdf5-dev \ + python-h5py \ + python-yaml \ + vim + +RUN pip install --upgrade pip +# Upgrade six +RUN pip install --upgrade six + +# Clone Keras repo and move into it +#RUN cd /root && git clone https://github.com/fchollet/keras.git && cd keras && \ +# # Install +# python setup.py install +RUN pip install --ignore-installed keras +#==1.2.2 +#RUN pip install --ignore-installed pygpu +#added + +# Install intern +RUN pip install --ignore-installed intern +#RUN pip install awscli +#RUN pip install boto3 +RUN pip install --ignore-installed SimpleITK + +# Create workspace +# TODO: Re-org this to use git clone and S3 +WORKDIR /src +#COPY ./weights/*.hdf5 /src/weights/ +#COPY ./aws-batch/setup/startup.sh /src/ +#COPY ./*.json /src/ +COPY ./*.py /src/ + +ENV KERAS_BACKEND=theano +ENV PATH=/src:$PATH + +#BLAS FOR THEANO +RUN apt-get install -y libatlas-base-dev +#ENV THEANO_FLAGS=blas.ldflags='-lf77blas -latlas -lgfortran' + +ENV DEVICE="cuda0" +ENV GPUARRAY_CUDA_VERSION=75 +ENV THEANO_FLAGS="device=cuda0,blas.ldflags='-lf77blas -latlas -lgfortran',dnn.include_path=/usr/local/cuda/include" +#ENV THEANO_FLAGS='device=cuda,lib.cnmem=1' + +RUN apt-get install -y python3-pip + +CMD ["python", "train_unet_docker.py"] diff --git a/saber/xbrain/unets/cnn_tools.py b/saber/xbrain/unets/cnn_tools.py new file mode 100644 index 0000000..99c831b --- /dev/null +++ b/saber/xbrain/unets/cnn_tools.py @@ -0,0 +1,231 @@ +""" +Copyright 2018 The Johns Hopkins University Applied Physics Laboratory. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +""" +Implements semantic segmentation for convnets using Keras. +""" +__author__ = 'mjp, Nov 2016' +__license__ = 'Apache 2.0' + +import os +from keras.models import Model +from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Dropout +from keras.layers.merge import concatenate +from keras.optimizers import Adam, SGD +from keras import backend as K + +from data_tools import * + +import time +import numpy as np + + +def timed_collection(c, rate=60*2): + """ Provides status on progress as one iterates through a collection. + """ + start_time = time.time() + last_chatter = -rate + + for idx, ci in enumerate(c): + yield ci + + elapsed = time.time() - start_time + if (elapsed) > last_chatter + rate: + last_chatter = elapsed + print('processed %d items in %0.2f minutes' % (idx+1, elapsed/60.)) + + +def f1_score(y_true, y_hat): + """ + """ + y_true_flat = y_true.flatten() + y_hat_flat = y_hat.flatten() + + try: + # default is to assume a Keras object + y_true_flat = K.flatten(y_true) + y_hat_flat = K.flatten(y_hat) + + intersection = K.sum(y_hat_flat * y_true_flat) + precision = intersection / K.sum(y_hat_flat) + recall = intersection / K.sum(y_true_flat) + except: + intersection = np.sum(y_hat_flat * y_true_flat) + precision = intersection / np.sum(y_hat_flat) + recall = intersection / np.sum(y_true_flat) + + return 2 * precision * recall / (precision + recall) + + +def pixelwise_crossentropy_loss(y_true, y_hat): + y_hat += 1e-8 # avoid issues with log + ce = -y_true * K.log(y_hat) - (1. - y_true) * K.log(1 - y_hat) + return K.mean(ce) + + +def pixelwise_crossentropy_loss_w(y_true, y_hat): + y_hat += 1e-8 # avoid issues with log + w = K.sum(y_true) / K.prod(K.shape(y_true)) + ce = -(1 - w) * y_true * K.log(y_hat) - w * (1. - y_true) * K.log(1 - y_hat) + return K.mean(ce) + + +def pixelwise_crossentropy_loss_mc(y_true, y_hat): + y_hat += 1e-8 # avoid issues with log + w = K.sum(K.sum(K.sum(y_true, axis=0), axis=2), axis=3) / K.prod(K.shape(y_true)) / K.shape(y_true)[1] + ce = -(1 - w) * y_true * K.log(y_hat) - w * (1. - y_true) * K.log(1 - y_hat) + return K.mean(ce) + + +def create_unet(sz): + """ + sz : a tuple specifying the input image size in the form: + (# channels, # rows, # columns) + References: + 1. Ronneberger et al. "U-Net: Convolutional Networks for Biomedical + Image Segmentation." 2015. + 2. https://github.com/jocicmarko/ultrasound-nerve-segmentation/blob/master/train.py + """ + + assert(len(sz) == 3) + if not np.all(np.mod(sz[1:], 16) == 0): + raise ValueError('This network assumes the input image dimensions are multiple of 2^4') + + # NOTES: + # o possibly change Deconvolution2D to UpSampling2D + bm = 'same' + + inputs = Input(sz) + conv1 = Conv2D(32, (3, 3), activation='relu', padding=bm)(inputs) + conv1 = Conv2D(32, (3, 3), activation='relu', padding=bm)(conv1) + pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) + + conv2 = Conv2D(64, (3, 3), activation='relu', padding=bm)(pool1) + conv2 = Conv2D(64, (3, 3), activation='relu', padding=bm)(conv2) + pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) + + conv3 = Conv2D(128, (3, 3), activation='relu', padding=bm)(pool2) + conv3 = Conv2D(128, (3, 3), activation='relu', padding=bm)(conv3) + pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) + + conv4 = Conv2D(256, (3, 3), activation='relu', padding=bm)(pool3) + conv4 = Conv2D(256, (3, 3), activation='relu', padding=bm)(conv4) + conv4 = Dropout(.5)(conv4) # mjp + pool4 = MaxPooling2D(pool_size=(2, 2))(conv4) + + conv5 = Conv2D(512, (3, 3), activation='relu', padding=bm)(pool4) + conv5 = Conv2D(512, (3, 3), activation='relu', padding=bm)(conv5) + + up6 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4], axis=1) + conv6 = Conv2D(256, (3, 3), activation='relu', padding=bm)(up6) + conv6 = Conv2D(256, (3, 3), activation='relu', padding=bm)(conv6) + + up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3], axis=1) + conv7 = Conv2D(128, (3, 3), activation='relu', padding=bm)(up7) + conv7 = Conv2D(128, (3, 3), activation='relu', padding=bm)(conv7) + + up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2], axis=1) + conv8 = Conv2D(64, (3, 3), activation='relu', padding=bm)(up8) + conv8 = Conv2D(64, (3, 3), activation='relu', padding=bm)(conv8) + + up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1], axis=1) + conv9 = Conv2D(32, (3, 3), activation='relu', padding=bm)(up9) + conv9 = Conv2D(32, (3, 3), activation='relu', padding=bm)(conv9) + + conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9) + + model = Model(inputs=inputs, outputs=conv10) + + return model + + +def train_model(X_train, Y_train, X_valid, Y_valid, model, output, + n_epochs=30, n_mb_per_epoch=25, mb_size=30, save_freq=100, + do_augment=False,args=None): + """ + Note: these are not epochs in the usual sense, since we randomly sample + the data set (vs methodically marching through it) + """ + sz = model.input_shape[-2:] + score_all = [] + best_score = 0 + for ii in range(n_epochs): + print('starting "epoch" %d (of %d)' % (ii, n_epochs)) + + for jj in timed_collection(range(n_mb_per_epoch)): + Xi, Yi = random_minibatch(X_train, Y_train, mb_size, sz=sz) + loss, f1 = model.train_on_batch(Xi, Yi) + score_all.append(f1) + + + # evaluate performance on validation data + Yi_hat = deploy_model(X_valid, model, do_augment) +# if ii % save_freq == 0: +# np.savez(os.path.join(output[:-4], 'valid_epoch%04d' % ii), +# X=X_valid, Y=Y_valid, Y_hat=Yi_hat, s=score_all) + + f1_curr = f1_score(Y_valid,Yi_hat) + print('f1 on validation data: %0.3f' % f1_curr) + print('recent train performance: %0.3f' % np.mean(score_all[-20:])) + print('y_hat min, max, mean: %0.2f / %0.2f / %0.2f' % (np.min(Yi_hat), + np.max(Yi_hat), + np.mean(Yi_hat))) + if f1_curr > best_score: + # save state + fn_out = os.path.join(output) + print('saving to : ' + fn_out) + if ii % save_freq == 0: + model.save_weights(fn_out) + best_score = f1_curr + if args: + np.save(args.score_out,best_score) + + + return best_score + + +def deploy_model(X, model, do_augment=False): + """ + X : a tensor of dimensions (n_examples, n_channels, n_rows, n_cols) + Note that n_examples will be used as the minibatch size. + """ + # the only slight complication is that the spatial dimensions of X might + # not be a multiple of the tile size. + sz = model.input_shape[-2:] + + Y_hat = np.zeros(X.shape) + + for rr in range(0, X.shape[-2], sz[0]): + ra = rr if rr+sz[0] < X.shape[-2] else X.shape[-2] - sz[0] + rb = ra+sz[0] + for cc in range(0, X.shape[-1], sz[1]): + ca = cc if cc+sz[1] < X.shape[-1] else X.shape[-1] - sz[-1] + cb = ca+sz[1] + if do_augment: + slices = X[:, :, ra:rb, ca:cb] + for i in range(slices.shape[0]): + s = slices[i, ...] + sm1 = apply_symmetry(s, 4) + sm2 = apply_symmetry(s, 5) + stack = np.concatenate((s, sm1, sm2), axis=0) + stack = stack[:, np.newaxis, :, :] + new_stack = model.predict(stack) + new_stack[1, ...] = apply_symmetry(new_stack[1, ...], 4) + new_stack[2, ...] = apply_symmetry(new_stack[2, ...], 5) + Y_hat[i, :, ra:rb, ca:cb] = np.mean(new_stack, axis=0) + else: + Y_hat[:, :, ra:rb, ca:cb] = model.predict( + X[:, :, ra:rb, ca:cb] + ) + + return Y_hat diff --git a/saber/xbrain/unets/data_tools.py b/saber/xbrain/unets/data_tools.py new file mode 100644 index 0000000..f47373d --- /dev/null +++ b/saber/xbrain/unets/data_tools.py @@ -0,0 +1,300 @@ +""" +Copyright 2018 The Johns Hopkins University Applied Physics Laboratory. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +""" +Some functions for manipulating data/images. +""" + +#from __future__ import print_function + +__author__ = 'mjp, Nov 2016' +__license__ = 'Apache 2.0' + +import numpy as np +from functools import partial + +from scipy.interpolate import griddata + + +# ------------------------------------------------------------------------------ +def random_minibatch(X, Y, num_in_batch, sz=(256, 256), do_warping=False): + """ Creates a single minibatch of training data by randomly sampling + subsets of the training data (X, Y). + This does not methodically examine all subsets of the domain, therefore + the notion of an 'epoch' is not tied to some guarantee of covering + all data if you use this function. + Parameters: + X := tensor with dimensions (#examples, #channels, rows, colums) + Y := tensor with dimensions (#examples, rows, columns) + num_in_batch := scalar; number of objects in the minibatch + sz := tuple (n_rows, n_cols) indicating the chip size + + """ + n, d, r, c = X.shape + + # preallocate memory for result + X_mb = np.zeros((num_in_batch, X.shape[1], sz[0], sz[0]), dtype=np.float32) + Y_mb = np.zeros((num_in_batch, Y.shape[1], sz[0], sz[0]), dtype=np.float32) + + for ii in range(num_in_batch): + # grab a random slice + ni = np.random.randint(low=0, high=n-1) + Xi = X[ni, ...] + Yi = Y[ni, ...] + + # grab a random tile of size sz + if not Xi.shape[-2:] == sz: + Xi, Yi = random_crop([Xi, Yi], sz) + + # warp/transform + Xi, Yi = apply_symmetry([Xi, Yi]) + if do_warping: + Xi,Yi = apply_warping(Xi, Yi) + + X_mb[ii, ...] = Xi + Y_mb[ii, ...] = Yi + + return X_mb, Y_mb + + +def apply_2d_operator(X, op): + """ Applies the function op to all 2d images contained in the tensor X. + Parameters: + X : A tensor with dimension (..., rows, cols) + op : a function that takes a single argument, an 2d matrix (rows, cols) and + returns a new 2d matrix of the same shape. + Example: + op = lambda M: M.transpose() + X = np.random.rand(5,5,3,3) + Y = apply_2d_operator(X, op) + X[0,2,...] + Y[0,2,...] + """ + if X.ndim == 2: + return op(X) + else: + sz = X.shape + X = np.reshape(X, (np.prod(sz[0:-2]), sz[-2], sz[-1])) + X_out = [op(X[ii]) for ii in range(X.shape[0])] + return np.reshape(X_out, sz) + + +def random_crop(tensors, sz): + """ + Grabs a random subset of the spatial dimensions of the provided tensors. + The same spatial extent will be extracted from all tensors. + + tensors := a list of image tensors with dimensions (..., rows, columns) + sz := a tuple (n_rows, n_cols) specifing the size of the crop + """ + + if isinstance(tensors, list): + r,c = tensors[0].shape[-2:] + else: + # caller provided a single tensor (rather than a list) + r,c = tensors.shape[-2:] + + # choose an upper-left corner for the crop + ri = np.random.randint(low=0, high=r-sz[0]-1) + ci = np.random.randint(low=0, high=c-sz[1]-1) + + # extract subset + if isinstance(tensors, list): + return [ X[..., ri:ri+sz[0], ci:ci+sz[1]] for X in tensors] + else: + X = tensors + return X[..., ri:ri+sz[0], ci:ci+sz[1]] + + +def apply_symmetry(tensors, op_idx=-1): + """Implements synthetic data augmentation by randomly appling + an element of the group of symmetries of the square. + The default set of data augmentation operations correspond to + the symmetries of the square (a non abelian group). The + elements of this group are: + o four rotations (0, pi/2, pi, 3*pi/4) + Denote these by: R0 R1 R2 R3 + o two mirror images (about y-axis or x-axis) + Denote these by: M1 M2 + o two diagonal flips (about y=-x or y=x) + Denote these by: D1 D2 + This page has a nice visual depiction: + http://www.cs.umb.edu/~eb/d4/ + Parameters: + tensors := a list of image tensors with dimensions (..., rows, columns) + op_index := An integer in [0,7] indicating which operator to apply. + If unspecified, the operation used will be random. + """ + + def R0(X): + return X # this is the identity map + + def M1(X): + return X[..., ::-1, :] + + def M2(X): + return X[..., ::-1] + + def D1(X): + sz = list(range(X.ndim)) + sz[-2], sz[-1] = sz[-1], sz[-2] + return np.transpose(X, sz) + + def R1(X): + return D1(M2(X)) # = rot90 on the last two dimensions + + def R2(X): + return M2(M1(X)) + + def R3(X): + return D2(M2(X)) + + def D2(X): + return R1(M1(X)) + + symmetries = [R0, R1, R2, R3, M1, M2, D1, D2] + + # choose the operation + op = symmetries[op_idx] if op_idx >= 0 else np.random.choice(symmetries) + + if isinstance(tensors, list): + return [op(x) for x in tensors] + else: + # presumably caller passed in just one tensor + return op(tensors) + + +def apply_warping(X, Y, sigma=10): + X0 = get_slice_0(X) + + # make sure images are square + n = X0.shape[0]; + assert(X0.shape[1] == n) + assert(Y.shape[-2] == Y.shape[-1] == n) + + omega_xnew, omega_ynew = make_displacement_mesh(n, sigma) + f_warp = partial(apply_displacement_mesh, omega_xnew=omega_xnew, omega_ynew=omega_ynew) + + X_new = apply_2d_operator(X, f_warp) + Y_new = apply_2d_operator(Y, f_warp) + + # TODO: something smarter here to deal with issues at boundary + X_new[np.isnan(X_new)] = 0 + Y_new[np.isnan(Y_new)] = 0 + + return X_new, Y_new + + +def make_displacement_mesh(n, sigma, n_seed_points=5): + """ Creates a warping/displacement mesh (for synthetic data augmentation). + + Parameters: + n : The width/height of the target image (assumed to be square) + sigma : standard deviation of displacements. + If negative, is interpreted as a deterministic displacement. + This latter usage is for testing, not actual applications. + n_seed_points : The number of random displacements to choose (in each dimension). + Displacements at all locations will be obtained via interpolation. + """ + glue = lambda X, Y: np.vstack([X.flatten(), Y.flatten()]).transpose() + + # the domain Omega is [0:n)^2 + omega_x, omega_y = np.meshgrid(np.arange(n), np.arange(n)) + + # create random displacement in the domain. + # Note: we "overshoot" the domain to avoid edge artifacts when + # interpolating back to the lattice on Z^2. + d_pts = np.linspace(0, n, n_seed_points) + d_xx, d_yy = np.meshgrid(d_pts, d_pts) + + if sigma > 0: + # random displacement + dx = sigma * np.random.randn(d_xx.size) + dy = sigma * np.random.randn(d_yy.size) + else: + # deterministic displacement (for testing) + dx = abs(sigma) * np.ones(d_xx.size) + dy = abs(sigma) * np.ones(d_yy.size) + + # use interpolation to generate a smooth displacement field. + omega_dx = griddata(glue(d_xx, d_yy), dx.flatten(), glue(omega_x, omega_y)) + omega_dy = griddata(glue(d_xx, d_yy), dy.flatten(), glue(omega_x, omega_y)) + + # reshape 1d -> 2d + omega_dx = np.reshape(omega_dx, (n, n)) + omega_dy = np.reshape(omega_dy, (n, n)) + + # generate a perturbed mesh + omega_xnew = omega_x + omega_dx + omega_ynew = omega_y + omega_dy + + return omega_xnew, omega_ynew + + +def plot_mesh(xx, yy, linespec='k-'): + """ Plots a pixel location mesh/lattice. + + xx : an (m x n) matrix of x-indices + yy : an (m x n) matrix of y-indices + """ + assert(xx.ndim == 2) + assert(yy.ndim == 2) + plt.hold(True) + for r in range(xx.shape[0]): + for c in range(xx.shape[1]): + if c+1 < xx.shape[1]: plt.plot((xx[r, c], xx[r, c+1]), + (yy[r, c], yy[r, c]), 'k-') # east + if r+1 < xx.shape[0]: plt.plot((xx[r, c], xx[r, c]), + (yy[r, c], yy[r+1, c]), 'k-') # south + plt.gca().set_xlim([np.min(xx), np.max(xx)]) + plt.gca().set_ylim([np.min(yy), np.max(yy)]) + plt.hold(False) + + +def apply_displacement_mesh(X, omega_xnew, omega_ynew): + """Interpolates pixel intensities back into a regular mesh. + Parameters: + X := an (m x n) matrix of pixel intensities + omega_xnew := an (m x n) matrix of perturbed x locations in R^2 + omega_ynew := an (m x n) matrix of perturbed y locations in R^2 + Returns: + X_int : an (m x n) matrix of interpolated pixel values + which live in Z^2 + """ + glue = lambda X, Y: np.vstack([X.flatten(), Y.flatten()]).transpose() + + n = X.shape[0] + assert(X.ndim == 2 and n == X.shape[1]) + + # this is the natural/original lattice where we wish to generate + # interpolated values. + omega_x, omega_y = np.meshgrid(np.arange(n), np.arange(n)) + + # use interpolation to estimate pixel intensities on original lattice + X_int = griddata(glue(omega_xnew, omega_ynew), + X.flatten(), + glue(omega_x, omega_y)) + X_int = np.reshape(X_int, (n, n)) + + return X_int + + +def get_slice_0(X): + """ Returns X[0,..,0, :, :] + """ + if X.ndim == 2: + return X + else: + sz = X.shape + new_sz = (np.prod(sz[0:-2]), sz[-2], sz[-1]) + return np.squeeze(np.reshape(X, sz)[0, :, :]) diff --git a/saber/xbrain/unets/deploy_unet_docker.py b/saber/xbrain/unets/deploy_unet_docker.py new file mode 100644 index 0000000..fb316c0 --- /dev/null +++ b/saber/xbrain/unets/deploy_unet_docker.py @@ -0,0 +1,80 @@ +""" +Copyright 2018 The Johns Hopkins University Applied Physics Laboratory. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os +import sys +import time +import numpy as np +import argparse + +import image_handler as ih + +from cnn_tools import * +from data_tools import * + +K.set_image_data_format('channels_first') #replaces K.set_image_dim_ordering('th') + + +if __name__ == '__main__': + # ------------------------------------------------------------------------- + + parser = argparse.ArgumentParser(description='Deploying Unets for Probability Mapping') + parser.set_defaults(func=lambda _: parser.print_help()) + parser.add_argument( + '--img_file', + required=True, + help='Local image file' + ) + parser.add_argument( + '--lbl_file', + required=False, + help='Groundtruth image file' + ) + parser.add_argument( + '--weights_file', + required=True, + help='Weights file to deploy' + ) + parser.add_argument( + '--tile_size', + required=False, + type=int, + default=256, + help='Size of image chunks processed by network' + ) + parser.add_argument( + '--output', + required=True, + help='Inference output file (npy)' + ) + + args = parser.parse_args() + y_data = np.load(args.img_file) # X, Y, Z + y_data = np.transpose(y_data) # Z, Y, X + print('Input data has shape: {}'.format(y_data.shape)) + y_data = y_data[:, np.newaxis, :, :].astype(np.float32) #Z, chan, Y, X + y_data /= 255. + tile_size = [args.tile_size, args.tile_size] + model = create_unet((1, tile_size[0], tile_size[1])) + model.load_weights(args.weights_file) + print('Deploying model...') + y_hat = deploy_model(y_data, model) + np.save(args.output, y_hat) + + if args.lbl_file: + y_hat = np.transpose(np.squeeze(y_hat)) # X,Y,Z + y_true = np.load(args.lbl_file) # X,Y,Z + print("Output data has shape: {}".format(y_hat.shape)) + print('Groundtruth data has shape: {}'.format(y_true.shape)) + f1 = f1_score(y_true, y_hat) + print("F1: {}".format(f1)) \ No newline at end of file diff --git a/saber/xbrain/unets/deploy_unets.cwl b/saber/xbrain/unets/deploy_unets.cwl new file mode 100644 index 0000000..e58a9dd --- /dev/null +++ b/saber/xbrain/unets/deploy_unets.cwl @@ -0,0 +1,39 @@ +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: aplbrain/unets +baseCommand: python +arguments: ["/src/deploy_unet_docker.py"] +inputs: + img_file: + type: File + inputBinding: + position: 1 + prefix: --img_file + lbl_file: + type: File? + inputBinding: + position: 2 + prefix: --lbl_file + weights_file: + type: File + inputBinding: + prefix: --weights_file + position: 3 + tile_size: + type: int? + inputBinding: + prefix: --tile_size + position: 4 + output: + type: string + inputBinding: + prefix: --output + position: 5 + +outputs: + membrane_probability_map: + type: File + outputBinding: + glob: $(inputs.output) \ No newline at end of file diff --git a/saber/xbrain/unets/image_handler.py b/saber/xbrain/unets/image_handler.py new file mode 100644 index 0000000..84c02a5 --- /dev/null +++ b/saber/xbrain/unets/image_handler.py @@ -0,0 +1,230 @@ +""" +Copyright 2018 The Johns Hopkins University Applied Physics Laboratory. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +__author__ = 'drenkng1' + +import numpy as np +import SimpleITK as sitk +import logging +import math + +dtype_map = { + 'uint8': sitk.sitkUInt8, + 'int16': sitk.sitkInt16, + 'uint16': sitk.sitkUInt16, + 'int32': sitk.sitkInt32, + 'uint32': sitk.sitkUInt32, + 'float32': sitk.sitkFloat32, + 'float64': sitk.sitkFloat64 +} + + +# ------------------------------------------------------------------------------ +def load_nii(filename, data_type='float32', do_transpose=False): + """ + This function takes a NIFTI filename as input and returns the loaded image + with the correct shape. SimpleITK loads images with the 3rd dimension + first, so the image is transposed to be in RAI format. + :param filename: NIFTI file + :return: img: NIFTI/Analyze image (float32) + """ + logging.info('Loading: %s' % filename) + nii = sitk.ReadImage(filename) + nii = sitk.Cast(nii, dtype_map[data_type]) + img = sitk.GetArrayFromImage(nii) + if do_transpose: + # Fix the image orientation so Z is last dimension + img = np.transpose(img, axes=(2, 1, 0)) + return img + + +# ------------------------------------------------------------------------------ +def save_nii(save_img, sample_nifti_file, filename, data_type='uint16'): + """ + This function saves a NIFTI file given a numpy array. Image data is cast to + the specified precision (with range rescaling). + :param save_img: Numpy image array to be saved + :param sample_nifti: Numpy image array to get header information from + :param filename: Filename with .nii.gz extension + :param data_type: String data type to cast data to + :return: None + """ + logging.info('Saving: %s' % filename) + sample_nii = sitk.ReadImage(sample_nifti_file) + # Setup raw label image + out_nii = sitk.GetImageFromArray(save_img) + out_nii.CopyInformation(sample_nii) + # Write images to file + sitk.WriteImage(sitk.Cast(out_nii, dtype_map[data_type]), filename) + + +# ------------------------------------------------------------------------------ +def save_nii2(save_img, filename, data_type='uint16'): + """ + This function saves a NIFTI file given a numpy array. Image data is cast to + the specified precision (with range rescaling). + :param save_img: Numpy image array to be saved + :param filename: Filename with .nii.gz extension + :param data_type: String data type to cast data to + :return: None + """ + logging.info('Saving: %s' % filename) + # d, r, c = save_img.shape + # dum_nii = sitk.Image(r, c, d, save_img.dtype) + # Setup raw label image + out_nii = sitk.GetImageFromArray(save_img) + # Write images to file + sitk.WriteImage(sitk.Cast(out_nii, dtype_map[data_type]), filename) + + +# ------------------------------------------------------------------------------ +def get_bounding_box(model_vol, pad=15): + """ + Function returns the bounding box for the given data volume. The bounding + box is based on the volume that encapsulates all nonzero voxels. + :param model_vol: Numpy array of data corresponding to labels/probabilities + :param pad: Optional parameter to specify how much to pad bounding box + (applies to min and max ends of the box). + :return: + A list with the bounding box row, col, and depth indices + """ + nonzero_inds_tup = np.nonzero(model_vol) + + bb = [] + dim = 0 + for arr in nonzero_inds_tup: + min_val = int(np.min(arr)) + max_val = int(np.max(arr)) + + # Include pad + min_val = max(min_val - pad, 0) + max_val = min(max_val + pad, model_vol.shape[dim]) + + bb.append((min_val, max_val)) + dim += 1 + + return bb + + +# ------------------------------------------------------------------------------ +def extract_volume(orig_vol, bb): + """ + Extract a sub-volume defined by the provided bounding box + :param orig_vol: Original data volume + :param bb: Bounding box list [(min_r,max_r),(min_c,max_c),(min_d,max_d)] + :return: + Numpy array with same size as the bounding box + """ + sub_r = range(bb[0][0], bb[0][1]) + sub_c = range(bb[1][0], bb[1][1]) + sub_d = range(bb[2][0], bb[2][1]) + out_vol = orig_vol[np.ix_(sub_r, sub_c, sub_d)] + + return out_vol + + +# ------------------------------------------------------------------------------ +def insert_volume(orig_vol, bb, sz): + """ + Inserts the given volume into a new volume (filled with zeros) of the given + size at the location of the specified bounding box. + :param orig_vol: Smaller volume containing original data + :param bb: Bounding box list [(min_r,max_r),(min_c,max_c),(min_d,max_d)] + :param sz: Tuple defining size of new volume (row,col,depth) + :return: + Numpy array with the same size as defined by the sz tuple + """ + sub_r = range(bb[0][0], bb[0][1]) + sub_c = range(bb[1][0], bb[1][1]) + sub_d = range(bb[2][0], bb[2][1]) + out_vol = np.zeros(sz) + out_vol[np.ix_(sub_r, sub_c, sub_d)] = orig_vol + return out_vol + + +# ------------------------------------------------------------------------------ +def get_neighborhood(data, pt, neighborhood=(5, 5, 5), force_cube=True, + pad_value=0): + """ + This function takes a full data matrix as input and returns the submatrix + centered at the specified point. If the point is on a boundary, the matrix + is padded by the + :param data: 3D input array + :param pt: Center point - (R,C,D) tuple + :param neighborhood: Tuple describing neighborhood shape (R,C,D) + :param pad_value: Value to use if neighborhood needs to be padded + :return: Volume with same shape as neighborhood tuple + """ + rows, cols, depth = data.shape + nrows, ncols, ndepth = neighborhood + + # Get neighborhood indices + start_r = np.floor(pt[0] - nrows / 2).astype(int) + stop_r = start_r + nrows + start_c = np.floor(pt[1] - ncols / 2).astype(int) + stop_c = start_c + nrows + start_d = np.floor(pt[2] - ndepth / 2).astype(int) + stop_d = start_d + nrows + + sub_r = range(max(start_r, 0), min(stop_r, rows)) + sub_c = range(max(start_c, 0), min(stop_c, cols)) + sub_d = range(max(start_d, 0), min(stop_d, depth)) + + temp_vol = data[np.ix_(sub_r, sub_c, sub_d)] + + if force_cube: + out_vol = pad_value * np.ones(neighborhood) + r = max(-start_r, 0) + c = max(-start_c, 0) + d = max(-start_d, 0) + out_vol[r:temp_vol.shape[0] + r, c:temp_vol.shape[1] + c, + d:temp_vol.shape[2] + d] = temp_vol + else: + out_vol = temp_vol + + return out_vol + + +# ------------------------------------------------------------------------------ +def convert_to_subvolumes(img, cube_dim): + """ + Convert the input image to a new volume that has been zero padded to + accomodate the requested number of sub-volumes. + :param img: Input volume + :param num_cubes: Requested number of cubes (may not equal the actual + number of cubes) + :param overlap: Fraction of overlap between adjacent sub-volumes + :return: out_img: Zero padded image + r_inds: Row indices for the subvolumes + c_inds: Col indices for the subvolumes + d_inds: Depth indices for the subvolumes + """ + + r, c, d = img.shape + + new_r = int(math.ceil(r / float(cube_dim)) * cube_dim) + new_c = int(math.ceil(c / float(cube_dim)) * cube_dim) + new_d = int(math.ceil(d / float(cube_dim)) * cube_dim) + + out_img = np.zeros((new_r, new_c, new_d), dtype=img.dtype) + + # Resize the image to the new size which accomodates the requested number + # of cubes + out_img[:r, :c, :d] = img + + step = int(cube_dim) + r_inds = np.arange(0, new_r, step) + c_inds = np.arange(0, new_c, step) + d_inds = np.arange(0, new_d, step) + + return out_img, r_inds.astype(int), c_inds.astype(int), d_inds.astype(int) diff --git a/saber/xbrain/unets/train_unet_docker.py b/saber/xbrain/unets/train_unet_docker.py new file mode 100644 index 0000000..9ef491a --- /dev/null +++ b/saber/xbrain/unets/train_unet_docker.py @@ -0,0 +1,381 @@ +""" +Copyright 2018 The Johns Hopkins University Applied Physics Laboratory. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os +import sys +import time +import numpy as np +import json +import argparse + +np.random.seed(9999) + +import image_handler as ih + +from intern.remote.boss import BossRemote +from intern.resource.boss.resource import * + +from cnn_tools import * +from data_tools import * + +K.set_image_data_format('channels_first') #replaces K.set_image_dim_ordering('th') + + + +class Namespace: + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + +def parse_args(json_file=None): + args = defaults + + if json_file: + with open(json_file, 'r') as f: + json_args = json.load(f) + args.update(json_args) + + return Namespace(**args) + + +def get_boss_data(args): + + config = {"protocol": "https", + "host": "api.bossdb.io", + "token": args.token} + rmt = BossRemote(config) + print('[info] Downloading data from BOSS') + chan = ChannelResource(args.chan_img, + args.coll, + args.exp, + 'image', + datatype=args.dtype_img) + + # Get the image data from the BOSS + x_train = rmt.get_cutout(chan, args.res, + [args.xmin,args.xmax], + [args.ymin,args.ymax], + [args.zmin,args.zmax]) + + lchan = ChannelResource(args.chan_labels, + args.coll, + args.exp, + 'annotation', + datatype=args.dtype_lbl) + + y_train = rmt.get_cutout(lchan, args.res, + [args.xmin,args.xmax], + [args.ymin,args.ymax], + [args.zmin,args.zmax]) + print('[info] Downloaded BOSS data') + # Data must be [slices, chan, row, col] (i.e., [Z, chan, Y, X]) + x_train = x_train[:, np.newaxis, :, :].astype(np.float32) + y_train = y_train[:, np.newaxis, :, :].astype(np.float32) + + # Pixel values must be in [0,1] + x_train /= 255. + y_train = (y_train > 0).astype('float32') + + return x_train, y_train + + +def get_file_data(args): + + file_type = args.img_file.split('.')[-1] + if file_type == 'gz' or file_type == 'nii': + x_train = ih.load_nii(args.img_file, data_type='uint8') + y_train = ih.load_nii(args.lbl_file, data_type='uint8') + + elif file_type == 'npy' or file_type == 'npz': + # Data input is X,Y,Z + x_train = np.load(args.img_file).T #Z,Y,X + y_train = np.load(args.lbl_file).T #Z,Y,X + + # Data must be [slices, chan, row, col] (i.e., [Z, chan, Y, X]) + x_train = x_train[:, np.newaxis, :, :].astype(np.float32) + y_train = y_train[:, np.newaxis, :, :].astype(np.float32) + + # Pixel values must be in [0,1] + x_train /= 255. + y_train = (y_train > 0).astype('float32') + + return x_train, y_train + + +if __name__ == '__main__': + # ------------------------------------------------------------------------- + + parser = argparse.ArgumentParser(description='Training Unets for Probability Mapping') + parser.set_defaults(func=lambda _: parser.print_help()) + parser.add_argument( + '--use_boss', + type=int, + required=True, + help='False(0) to use img_file and lbl_file or True(1) to pull data from boss') + parser.add_argument( + '--img_file', + required=False, + help='Local image file' + ) + parser.add_argument( + '--lbl_file', + required=False, + help='Local label file' + ) + parser.add_argument( + '--coord', + required=False, + help='Coord frame for BOSS' + ) + parser.add_argument( + '--token', + required=False, + help='Token for BOSS' + ) + parser.add_argument( + '--coll', + required=False, + help='Collection Name for BOSS' + ) + parser.add_argument( + '--exp', + required=False, + help='Experiment Name for BOSS' + ) + parser.add_argument( + '--chan_labels', + required=False, + help='Label channel for BOSS' + ) + parser.add_argument( + '--chan_img', + required=False, + help='Raw img channel for BOSS' + ) + parser.add_argument( + '--dtype_img', + required=False, + help='Datatype for BOSS' + ) + parser.add_argument( + '--dtype_lbl', + required=False, + help='Datatype for BOSS annotation' + ) + parser.add_argument( + '--res', + type=int, + required=False, + help='resolution for BOSS' + ) + parser.add_argument( + '--xmin', + type=int, + required=False, + help='Xmin of range for BOSS' + ) + parser.add_argument( + '--xmax', + type=int, + required=False, + help='Xmax of range for BOSS' + ) + parser.add_argument( + '--ymin', + type=int, + required=False, + help='Ymin of range for BOSS' + ) + parser.add_argument( + '--ymax', + type=int, + required=False, + help='Ymax of range for BOSS' + ) + parser.add_argument( + '--zmin', + type=int, + required=False, + help='Zmin of range for BOSS' + ) + parser.add_argument( + '--zmax', + type=int, + required=False, + help='Zmax of range for BOSS' + ) + parser.add_argument( + '--train_pct', + required=False, + type=float, + default=0.5, + help='Percentage of z slices to use as training' + ) + parser.add_argument( + '--n_epochs', + required=False, + type=int, + default=10, + help='Number of training epochs' + ) + parser.add_argument( + '--mb_size', + required=False, + type=int, + default=4, + help='Minibatch size' + ) + parser.add_argument( + '--n_mb_per_epoch', + required=False, + type=int, + default=3, + help='num mb per epoch' + ) + parser.add_argument( + '--learning_rate', + required=False, + type=float, + default=0.0001, + help='Adam or SGD learning rate for training' + ) + parser.add_argument( + '--use_adam', + required=False, + type=bool, + default=False, + help='Flag to use adam or sgd' + ) + parser.add_argument( + '--beta1', + required=False, + type=float, + default=0.9, + help='Adam first moment forgetting factor' + ) + parser.add_argument( + '--beta2', + required=False, + type=float, + default=0.999, + help='Adam second moment forgetting factor' + ) + parser.add_argument( + '--momentum', + required=False, + type=float, + default=0.99, + help='SGD momemntum value' + ) + parser.add_argument( + '--decay', + required=False, + type=float, + default=0.000001, + help='SGD decay value' + ) + parser.add_argument( + '--save_freq', + required=False, + type=int, + default=50, + help='How often to save' + ) + parser.add_argument( + '--do_warp', + required=False, + type=bool, + default=False, + help='Warp data?' + ) + parser.add_argument( + '--tile_size', + required=False, + type=int, + default=256, + help='Size of image chunks processed by network' + ) + parser.add_argument( + '--weights_file', + required=False, + help='Weights file to continue training' + ) + parser.add_argument( + '--score_out', + required=True, + help='File for output of final score' + ) + parser.add_argument( + '--output', + required=True, + help='Weights output file (hdf5)' + ) + + args = parser.parse_args() + if args.use_boss: + x_train, y_train = get_boss_data(args) + else: + x_train, y_train = get_file_data(args) + + tile_size = (args.tile_size,args.tile_size) + train_pct = args.train_pct + # ------------------------------------------------------------------------- + + # Data must be [slices, chan, row, col] (i.e., [Z, chan, Y, X]) + # split into train and valid + train_slices = range(int(train_pct * x_train.shape[0])) + x_train = x_train[train_slices, ...] + y_train = y_train[train_slices, ...] + + valid_slices = range(int(train_pct * x_train.shape[0]), x_train.shape[0]) + x_valid = x_train[valid_slices, ...] + y_valid = y_train[valid_slices, ...] + + print('[info]: training data has shape: %s' % str(x_train.shape)) + print('[info]: training labels has shape: %s' % str(y_train.shape)) + print('[info]: validation data has shape: %s' % str(x_valid.shape)) + print('[info]: validation labels has shape: %s' % str(y_valid.shape)) + print('[info]: tile size: %s' % str(tile_size)) + + # train model + tic = time.time() + model = create_unet((1, tile_size[0], tile_size[1])) + #if args.do_synapse: + if args.use_adam: + print('[info]: using adam optimizer') + model.compile(optimizer=Adam(lr=args.learning_rate,beta_1=args.beta1,beta_2=args.beta2), + loss=pixelwise_crossentropy_loss_w, + metrics=[f1_score]) + else: + print('[info]: using sgd optimizer') + model.compile(optimizer=SGD(lr=args.learning_rate,decay=args.decay,momentum=args.momentum), + loss=pixelwise_crossentropy_loss_w, + metrics=[f1_score]) + #else: + # model.compile(optimizer=Adam(lr=args.learning_rate,beta_1=args.beta1,beta_2=args.beta2), + # loss=pixelwise_crossentropy_loss, + # metrics=[f1_score]) + + if args.weights_file: + if args.weights_file != "None": + model.load_weights(args.weights_file) + + f1=train_model(x_train, y_train, x_valid, y_valid, model, + args.output, do_augment=args.do_warp, + n_epochs=args.n_epochs, mb_size=args.mb_size, + n_mb_per_epoch=args.n_mb_per_epoch, + save_freq=args.save_freq,args=args) + + print('[info]: total time to train model: %0.2f min' % + ((time.time() - tic) / 60.)) + print("F1: {}".format(f1)) diff --git a/saber/xbrain/unets/train_unets.cwl b/saber/xbrain/unets/train_unets.cwl new file mode 100644 index 0000000..a02bd90 --- /dev/null +++ b/saber/xbrain/unets/train_unets.cwl @@ -0,0 +1,180 @@ +cwlVersion: v1.0 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: xbrain:unets +baseCommand: python +arguments: ["train_unet_docker.py"] +inputs: + useboss: + type: int + inputBinding: + position: 1 + prefix: --use_boss + + img_file: + type: File? + inputBinding: + position: 2 + prefix: --img_file + + lbl_file: + type: File? + inputBinding: + position: 3 + prefix: --lbl_file + + coord_name: + type: string + inputBinding: + position: 4 + prefix: --coord + + token: + type: string? + inputBinding: + position: 5 + prefix: --token + + coll_name: + type: string? + inputBinding: + position: 6 + prefix: --coll + + exp_name: + type: string? + inputBinding: + position: 7 + prefix: --exp + + chan_name: + type: string? + inputBinding: + position: 8 + prefix: --chan + + dtype_img: + type: string? + inputBinding: + position: 9 + prefix: --dtype_img + + dtype_lbl: + type: string? + inputBinding: + position: 9 + prefix: --dtype_lbl + + resolution: + type: int? + inputBinding: + prefix: --res + position: 10 + + xmin: + type: int? + inputBinding: + prefix: --xmin + position: 11 + + xmax: + type: int? + inputBinding: + prefix: --xmax + position: 13 + + ymin: + type: int? + inputBinding: + prefix: --ymin + position: 14 + + ymax: + type: int? + inputBinding: + prefix: --ymax + position: 15 + + zmin: + type: int? + inputBinding: + prefix: --zmin + position: 16 + + zmax: + type: int? + inputBinding: + prefix: --zmax + position: 17 + + train_pct: + type: float? + inputBinding: + prefix: --train_pct + position: 18 + + n_epochs: + type: int? + inputBinding: + prefix: --n_epochs + position: 19 + + mb_size: + type: int? + inputBinding: + prefix: --mb_size + position: 20 + + n_mb_per_epoch: + type: int? + inputBinding: + prefix: --n_mb_per_epoch + position: 21 + + learning_rate: + type: float? + inputBinding: + prefix: --learning_rate + position: 22 + + beta1: + type: float? + inputBinding: + prefix: --beta1 + position: 23 + + beta2: + type: float? + inputBinding: + prefix: --beta2 + position: 24 + + save_freq: + type: int? + inputBinding: + prefix: --save_freq + position: 25 + + do_warp: + type: boolean? + inputBinding: + prefix: --do_warp + position: 26 + + tile_size: + type: int? + inputBinding: + prefix: --tile_size + position: 27 + weights_file: + type: File? + inputBinding: + prefix: --weights_file + position: 28 + +outputs: + membrane_detection_out: + type: File + outputBinding: + glob: $(inputs.output) diff --git a/saber/xbrain/unsupervised_celldetect.py b/saber/xbrain/unsupervised_celldetect.py index 0f98db8..41b99c5 100644 --- a/saber/xbrain/unsupervised_celldetect.py +++ b/saber/xbrain/unsupervised_celldetect.py @@ -1,17 +1,3 @@ -# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - #!/usr/bin/env python import argparse @@ -109,30 +95,15 @@ def cell_metrics(args): img = nib.load(args.groundtruth) centroids = np.load(args.input) volume = img.get_fdata()# np.load(args.input) - try: - f1 = cell_metrics2D(centroids,volume,args.initial_template_size) - except (ValueError, ZeroDivisionError) as v: - print('Received following error when trying to calculate F1, returning 0') - print(v) - f1 = 0 - print("F1: {}".format(f1)) - + f1 = cell_metrics2D(centroids,volume,args.initial_template_size) with open(args.output, 'wb') as f: np.save(f, f1) def cell_metrics3D(args): img = np.load(args.groundtruth) centroids = np.load(args.input) - # Enforce shape of centroids - if len(centroids.shape) != 2 or centroids.shape[1] != 4: - raise ValueError('Dimension mismatch in cell list, should be (n,4), is {}'.format(centroids.shape)) #volume = img.get_fdata()# np.load(args.input) - try: - f1 = f1_centroid3D(centroids,img,args.initial_template_size) - except ValueError as v: - print('Received following error when trying to calculate F1, returning 0') - print(v) - f1 = 0 + f1 = f1_centroid3D(centroids,img,args.initial_template_size) print("F1: {}".format(f1)) with open(args.output, 'wb') as f: np.save(f, f1) diff --git a/saber/xbrain/workflows/parameterization.yml b/saber/xbrain/workflows/parameterization.yml new file mode 100644 index 0000000..61d003b --- /dev/null +++ b/saber/xbrain/workflows/parameterization.yml @@ -0,0 +1,24 @@ +x: + range: + start: 0 + stop: 10000 + step: 10 + parameters: + - xmin + - xmax +y: + range: + start: 0 + stop: 10000 + step: 10 + parameters: + - ymin + - ymax +z: + range: + start: 0 + stop: 10000 + step: 10 + parameters: + - zmin + - zmax diff --git a/saber/xbrain/workflows/xbrain-example-job.yml b/saber/xbrain/workflows/xbrain-example-job.yml new file mode 100644 index 0000000..6828cdf --- /dev/null +++ b/saber/xbrain/workflows/xbrain-example-job.yml @@ -0,0 +1,22 @@ + + +data: + class: File + path: V2_imgdata_gt.npy +classifier: + class: File + path: xbrain_vessel_seg_v7.ilp +membrane_classify_output_name: membrane_classify_output.npy +cell_detect_output_name: cell_detect_output.npy +vessel_segment_output_name: vessel_segment_output.npy +bucket: saber-batch +# ram_amount: int? +# num_threads: int? +# detect_threshold: float? +# stop: float? +# initial_template_size: int? +# detect_dilation: int? +# max_cells: int? +# segment_threshold: float? +# segment_dilation: int? +# minimum: int? \ No newline at end of file diff --git a/saber/xbrain/workflows/xbrain.cwl b/saber/xbrain/workflows/xbrain.cwl new file mode 100644 index 0000000..af29af9 --- /dev/null +++ b/saber/xbrain/workflows/xbrain.cwl @@ -0,0 +1,69 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +inputs: + # i/o + data: File + membrane_classify_output_name: string + cell_detect_output_name: string + vessel_segment_output_name: string + bucket: string + + # Cell detect + detect_threshold: float? + stop: float? + initial_template_size: int? + detect_dilation: int? + max_cells: int? + classifier: File + # Membrane classify + ram_amount: int? + num_threads: int? + segment_threshold: float? + segment_dilation: int? + minimum: int? +outputs: + membrane_classify_output: + type: File + outputSource: membrane_classify/membrane_probability_map + cell_detect_output: + type: File + outputSource: cell_detect/cell_detect_results + vessel_segment_output: + type: File + outputSource: vessel_segment/vessel_segment_results + +steps: + membrane_classify: + run: ../tools/membrane_classify.cwl + in: + bucket: bucket + input: data + output_name: membrane_classify_output_name + classifier: classifier + ram_amount: ram_amount + num_threads: num_threads + out: [membrane_probability_map] + cell_detect: + run: ../tools/cell_detect.cwl + in: + bucket: bucket + input: membrane_classify/membrane_probability_map + output_name: cell_detect_output_name + threshold: detect_threshold + stop: stop + initial_template_size: initial_template_size + dilation: detect_dilation + max_cells: max_cells + out: [cell_detect_results] + vessel_segment: + run: ../tools/vessel_segment.cwl + in: + bucket: bucket + input: membrane_classify/membrane_probability_map + output_name: vessel_segment_output_name + threshold: segment_threshold + dilation: segment_dilation + minimum: minimum + out: [vessel_segment_results] diff --git a/saber/xbrain/workflows/xbrain_supervised.cwl b/saber/xbrain/workflows/xbrain_supervised.cwl index 8538d6a..d1e0b4b 100644 --- a/saber/xbrain/workflows/xbrain_supervised.cwl +++ b/saber/xbrain/workflows/xbrain_supervised.cwl @@ -1,17 +1,3 @@ -# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - #!/usr/bin/env cwl-runner cwlVersion: v1.0 diff --git a/saber/xbrain/workflows/xbrain_supervised_optimization.cwl b/saber/xbrain/workflows/xbrain_supervised_optimization.cwl index 58b07ea..910c9f3 100644 --- a/saber/xbrain/workflows/xbrain_supervised_optimization.cwl +++ b/saber/xbrain/workflows/xbrain_supervised_optimization.cwl @@ -1,17 +1,3 @@ -# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - #!/usr/bin/env cwl-runner cwlVersion: v1.0 diff --git a/saber/xbrain/workflows/xbrain_unets_celldetect_train.cwl b/saber/xbrain/workflows/xbrain_unets_celldetect_train.cwl new file mode 100644 index 0000000..829133c --- /dev/null +++ b/saber/xbrain/workflows/xbrain_unets_celldetect_train.cwl @@ -0,0 +1,164 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +inputs: + use_boss: int + coord: string? + token: string? + host_name: string? + coll: string? + exp: string? + chan_labels: string? + chan_img: string? + dtype_img: string? + dtype_lbl: string? + itype: string? + padding: int? + res: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + train_pct: float? + n_epochs: int? + mb_size: int? + n_mb_per_epoch: int? + use_adam: int? + learning_rate: float? + decay: float? + momentum: float? + beta1: float? + beta2: float? + save_freq: int? + do_warp: boolean? + tile_size: int? + weights_file: File? + + detect_threshold: float? + stop: float? + initial_template_size: int? + detect_dilation: int? + + output: string + score_out: string + raw_pull_output_name: string + anno_pull_output_name: string + metrics_out: string +outputs: + train_output: + type: File + outputSource: optimize/classifier_weights +steps: + raw_boss_pull: + run: ../../boss_access/boss_pull.cwl + in: + host_name: host_name + token: token + coll_name: coll_name + exp_name: exp_name + chan_name: chan_img + dtype_name: dtype_img + itype_name: itype + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: raw_pull_output_name + coord_name: coord_name + bucket: bucket + out: + [raw_pull_output] + anno_boss_pull: + run: ../../boss_access/boss_pull.cwl + in: + host_name: host_name + token: token + coll_name: coll_name + exp_name: exp_name + chan_name: chan_lbl + dtype_name: dtype_lbl + itype_name: itype + resolution: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: anno_pull_output_name + coord_name: coord_name + bucket: bucket + out: + [anno_pull_output] + optimize: + run: ../tools/membrane_unets_train.cwl + in: + use_boss: use_boss + img_file: raw_boss_pull/raw_pull_output + lbl_file: anno_boss_pull/anno_pull_output + coord: coord + token: token + coll: coll + exp: exp + chan_labels: chan_labels + chan_img: chan_img + dtype_img: dtype_img + dtype_lbl: dtype_lbl + res: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + train_pct: train_pct + n_epochs: n_epochs + mb_size: mb_size + n_mb_per_epoch: n_mb_per_epoch + use_adam: use_adam + learning_rate: learning_rate + momentum: momentum + decay: decay + beta1: beta1 + beta2: beta2 + save_freq: save_freq + do_warp: do_warp + tile_size: tile_size + weights_file: weights_file + output: output + score_out: score_out + out: [membrane_probability,classifier_weights,scores] + hints: + saber: + score_format: "F1: {score}" + local: True + cell_detect: + run: ../tools/unsup_cell_detect_3D_nos3.cwl + in: + input: membrane_classify/membrane_probability + output_name: optimize_output_name + threshold: detect_threshold + stop: stop + initial_template_size: initial_template_size + dilation: detect_dilation + max_cells: max_cells + out: [cell_detect_results] + metrics: + run: ../tools/unsup_metrics_3D_nos3.cwl + in: + input: cell_detect/cell_detect_results + output_name: metrics_out + groundtruth: anno_boss_pull/anno_pull_output + out: [metrics] + hints: + saber: + score_format: "F1: {score}" + local: True diff --git a/saber/xbrain/workflows/xbrain_unets_train.cwl b/saber/xbrain/workflows/xbrain_unets_train.cwl new file mode 100644 index 0000000..c5cc98e --- /dev/null +++ b/saber/xbrain/workflows/xbrain_unets_train.cwl @@ -0,0 +1,80 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +inputs: + use_boss: int + img_file: File? + lbl_file: File? + coord: string? + token: string? + coll: string? + exp: string? + chan_labels: string? + chan_img: string? + dtype_img: string? + dtype_lbl: string? + res: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + train_pct: float? + n_epochs: int? + mb_size: int? + n_mb_per_epoch: int? + learning_rate: float? + beta1: float? + beta2: float? + save_freq: int? + do_warp: boolean? + tile_size: int? + weights_file: File? + output: string + score_out: string +outputs: + train_output: + type: File + outputSource: optimize/classifier_weights +steps: + optimize: + run: ../tools/membrane_unets_train.cwl + in: + use_boss: use_boss + img_file: img_file + lbl_file: lbl_file + coord: coord + token: token + coll: coll + exp: exp + chan_labels: chan_labels + chan_img: chan_img + dtype_img: dtype_img + dtype_lbl: dtype_lbl + res: res + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + train_pct: train_pct + n_epochs: n_epochs + mb_size: mb_size + n_mb_per_epoch: n_mb_per_epoch + learning_rate: learning_rate + beta1: beta1 + beta2: beta2 + save_freq: save_freq + do_warp: do_warp + tile_size: tile_size + weights_file: weights_file + output: output + score_out: score_out + out: [classifier_weights,scores] + hints: + saber: + score_format: "F1: {score}" + local: True diff --git a/saber/xbrain/workflows/xbrain_unsupervised_optimization.cwl b/saber/xbrain/workflows/xbrain_unsupervised_optimization.cwl index d58492d..292e86c 100644 --- a/saber/xbrain/workflows/xbrain_unsupervised_optimization.cwl +++ b/saber/xbrain/workflows/xbrain_unsupervised_optimization.cwl @@ -1,17 +1,3 @@ -# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - #!/usr/bin/env cwl-runner cwlVersion: v1.0 diff --git a/saber/xbrain/workflows/xbrain_unsupervised_optimization3D.cwl b/saber/xbrain/workflows/xbrain_unsupervised_optimization3D.cwl index c5bd47f..70731f8 100644 --- a/saber/xbrain/workflows/xbrain_unsupervised_optimization3D.cwl +++ b/saber/xbrain/workflows/xbrain_unsupervised_optimization3D.cwl @@ -1,17 +1,3 @@ -# Copyright 2019 The Johns Hopkins University Applied Physics Laboratory -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - #!/usr/bin/env cwl-runner cwlVersion: v1.0 diff --git a/saber/xbrain/workflows/xbrain_with_boss.cwl b/saber/xbrain/workflows/xbrain_with_boss.cwl new file mode 100644 index 0000000..026294c --- /dev/null +++ b/saber/xbrain/workflows/xbrain_with_boss.cwl @@ -0,0 +1,155 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +inputs: + # Shared + _saber_bucket: string + # Inputs for BOSS + config: File? + token: string? + coll_name: string + exp_name: string + in_chan_name: string + out_chan_name: string + dtype_name: string + out_dtype_name: string + itype_name: string + out_itype_name: string + resolution: int? + xmin: int? + xmax: int? + ymin: int? + ymax: int? + zmin: int? + zmax: int? + padding: int? + onesided: int? + pull_output_name: string + coord_name: string + # Inputs for steps + + classifier: File + membrane_classify_output_name: string + cell_detect_output_name: string + vessel_segment_output_name: string + ram_amount: int? + num_threads: int? + detect_threshold: float? + stop: float? + initial_template_size: int? + detect_dilation: int? + max_cells: int? + segment_threshold: float? + segment_dilation: int? + minimum: int? + + map_output_name: string + list_output_name: string + centroid_volume_output_name: string +outputs: + pull_output: + type: File + outputSource: boss_pull/pull_output + membrane_classify_output: + type: File + outputSource: membrane_classify/membrane_probability_map + cell_detect_output: + type: File + outputSource: cell_detect/cell_detect_results + vessel_segment_output: + type: File + outputSource: vessel_segment/vessel_segment_results + +steps: + boss_pull: + run: ../../boss_access/boss_pull.cwl + in: + config: config + token: token + coll_name: coll_name + exp_name: exp_name + chan_name: in_chan_name + dtype_name: dtype_name + itype_name: itype_name + resolution: resolution + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + output_name: pull_output_name + coord_name: coord_name + bucket: bucket + onesided: onesided + out: + [pull_output] + + membrane_classify: + run: ../tools/membrane_classify.cwl + in: + input: boss_pull/pull_output + output_name: membrane_classify_output_name + classifier: classifier + ram_amount: ram_amount + num_threads: num_threads + bucket: bucket + out: [membrane_probability_map] + cell_detect: + run: ../tools/cell_detect.cwl + in: + input: membrane_classify/membrane_probability_map + output_name: cell_detect_output_name + classifier: classifier + threshold: detect_threshold + stop: stop + initial_template_size: initial_template_size + dilation: detect_dilation + max_cells: max_cells + # bucket: bucket + out: [cell_detect_results] + vessel_segment: + run: ../tools/vessel_segment.cwl + in: + input: membrane_classify/membrane_probability_map + output_name: vessel_segment_output_name + classifier: classifier + threshold: segment_threshold + dilation: segment_dilation + minimum: minimum + bucket: bucket + out: [vessel_segment_results] + cell_split: + run: ../tools/cell_split.cwl + in: + input: cell_detect/cell_detect_results + map_output_name: map_output_name + list_output_name: list_output_name + centroid_volume_output_name: centroid_volume_output_name + out: + [cell_map, cell_list, centroid_volume] + boss_push: + run: ../../boss_access/boss_push.cwl + in: + token: token + coll_name: coll_name + exp_name: exp_name + chan_name: out_chan_name + dtype_name: out_dtype_name + itype_name: out_itype_name + resolution: resolution + xmin: xmin + xmax: xmax + ymin: ymin + ymax: ymax + zmin: zmin + zmax: zmax + padding: padding + input: cell_split/centroid_volume + coord_name: coord_name + bucket: bucket + onesided: onesided + out: [] + diff --git a/saber/xbrain/xbrain.py b/saber/xbrain/xbrain.py index d59bb08..a71396c 100644 --- a/saber/xbrain/xbrain.py +++ b/saber/xbrain/xbrain.py @@ -369,6 +369,10 @@ def detect_cells(cell_probability, probability_threshold, stopping_criterion, import pdb import logging + if len(cell_probability.shape) == 4: + print('Assuming Z, Chan, Y, X input') + cell_probability = np.transpose(cell_probability[:,0,:,:], (2,1,0)) + # threshold probability map. newtest = (cell_probability * (cell_probability > probability_threshold)).astype('float32') #initial_template_size is an int now but could a vector later on - convert it to an array @@ -822,8 +826,12 @@ def centroid_f1(C0,C1,thres): C0 = np.transpose(C0) C1 = np.transpose(C1) Y = scipy.spatial.distance.cdist(C0, C1, 'euclidean') - vals = np.sort(np.amin(Y,axis=1)) - valinds = np.argsort(np.min(Y,axis=1)) + try: + vals = np.sort(np.amin(Y,axis=1)) + valinds = np.argsort(np.min(Y,axis=1)) + except ValueError: + print("No Detected Objects") + return 0 L = len(vals[np.where(vals<=thres)]) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..40c3ba4 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +from distutils.core import setup +deps = [ + "watchdog(==0.9.0)", + "parse(==1.9.0)", + "boto3(==1.9.79)", + "docker(==3.7.0)", + "datajoint(==0.11.3)", + "cwltool(==1.0.20181217162649)", + ] +setup(name='conduit', + version='1.0', + description='Conduit tool for SABER', + author='Raphael Norman-Tenazas', + author_email='raphael.norman-tenazas@jhuapl.edu', + url='https://github.com/aplbrain/saber', + packages=['conduit', 'conduit.utils'], + scripts=['conduit/conduit'], + install_requires=deps, + setup_requires=deps + + ) \ No newline at end of file