From eb9d30e8cfa474868bd9c8694b6bd73f710b9b36 Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Wed, 1 Feb 2023 02:27:16 -0500 Subject: [PATCH 1/3] Backend optimizations and improvements, and implement support for TAXII interop filters. --- .github/workflows/python-ci-tests.yml | 2 +- README.rst | 43 +- conftest.py | 3 + docs/conf.py | 4 +- docs/mongodb_schema.rst | 96 +- medallion/__init__.py | 56 +- medallion/backends/base.py | 53 +- medallion/backends/memory_backend.py | 933 +++++++++------ medallion/backends/mongodb_backend.py | 291 +++-- medallion/common.py | 294 ++--- medallion/exceptions.py | 5 + medallion/filters/basic_filter.py | 210 ---- medallion/filters/common.py | 234 ++++ medallion/filters/memory_filter.py | 834 ++++++++++++++ medallion/filters/mongodb_filter.py | 787 +++++++++++-- medallion/scripts/run.py | 17 +- medallion/test/base_test.py | 135 --- medallion/test/conftest.py | 4 - medallion/test/data/default_data.json | 182 +-- medallion/test/test_backends.py | 1475 ++++++++++++++++-------- medallion/test/test_memory_backend.py | 784 +++++++++++++ medallion/test/test_startup.py | 26 +- medallion/views/manifest.py | 2 +- medallion/views/objects.py | 8 +- sample-config-with-memory-backend.json | 2 +- setup.py | 7 +- tox.ini | 4 +- 27 files changed, 4715 insertions(+), 1776 deletions(-) create mode 100644 conftest.py delete mode 100644 medallion/filters/basic_filter.py create mode 100644 medallion/filters/common.py create mode 100644 medallion/filters/memory_filter.py delete mode 100644 medallion/test/base_test.py create mode 100644 medallion/test/test_memory_backend.py diff --git a/.github/workflows/python-ci-tests.yml b/.github/workflows/python-ci-tests.yml index e7fbb189..0ca1a43d 100644 --- a/.github/workflows/python-ci-tests.yml +++ b/.github/workflows/python-ci-tests.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: [3.7, 3.8, 3.9, '3.10'] name: Python ${{ matrix.python-version }} Build steps: diff --git a/README.rst b/README.rst index 3d790183..e2d54dc2 100644 --- a/README.rst +++ b/README.rst @@ -113,23 +113,26 @@ The contains: To use the Memory back-end plug, include the following in the : -.. code-block:: json +.. code-block:: text { "backend": { "module_class": "MemoryBackend", - "filename": "" + "filename": , + "interop_requirements": true/false # the TAXII interop document has some additional requirements } } To use the Mongo DB back-end plug, include the following in the : -.. code-block:: json +.. code-block:: text { "backend": { "module_class": "MongoBackend", - "uri": " # e.g., 'mongodb://localhost:27017/'" + "uri": # e.g., 'mongodb://localhost:27017/' + "filename": , + "interop_requirements": true/false # the TAXII interop document has some additional requirements } } @@ -138,13 +141,16 @@ To use the Mongo DB back-end plug, include the following in the : A description of the Mongo DB structure expected by the mongo db backend code is described in `the documentation `_. +The ``interop_requirements`` option will enforce additional requirements from +the TAXII 2.1 Interoperability specification. It defaults to ``false``. + As required by the TAXII specification, *medallion* supports HTTP Basic authorization. However, the user names and passwords are currently stored in the in plain text. Here is an example: -.. code-block:: json +.. code-block:: text { "users": { @@ -161,43 +167,38 @@ Authorization could be enhanced by changing the method "decorated" using Configs may also contain a "taxii" section as well, as shown below: -.. code-block:: json +.. code-block:: text { "taxii": { "max_page_size": 100 - "interop_requirements": true } } All TAXII servers require a config, though if any of the sections specified above are missing, they will be filled with default values. -The ``interop_requirements`` option will enforce additional requireemnts from -the TAXII 2.1 Interoperability specification. It defaults to ``false``. - We welcome contributions for other back-end plugins. Docker ------ -We also provide a Docker image to make it easier to run *medallion* +We also provide a Docker image to make it easier to run *medallion* with the MongoDB backend. Use the --build argument +if the code has changed. .. code-block:: bash - $ docker build . -t medallion -f docker_utils/Dockerfile + $ docker-compose up [--build] -If operating behind a proxy, add the following option (replacing `` with -your proxy location and port): ``--build-arg https_proxy=``. +This uses the information in docker-compose.yml to create a Docker container with medallion, mongo db and mongo-express -Then run the image +If operating behind a proxy, add the following to the medallion:build section of docker-compose.yml: -.. code-block:: bash +.. code-block:: text - $ docker run --rm -p 5000:5000 -v :/var/taxii medallion + HTTPS_PROXY: -Replace ```` with the full path to the directory containing your -medallion configuration. +replacing with your proxy location and port. Governance ---------- @@ -249,10 +250,8 @@ additional or substitute Maintainers, per `consensus agreements `__; GitHub ID: https://github.com/clenk/; WWW: `MITRE Corporation `__ -- `Rich Piazza `__; GitHub ID: https://github.com/rpiazza/; WWW: `MITRE Corporation `__ -- `Zach Rush `__; GitHub ID: https://github.com/zrush-mitre/; WWW: `MITRE Corporation `__ - `Jason Keirstead `__; GitHub ID: https://github.com/JasonKeirstead; WWW: `IBM `__ +- `Duncan Sparrell `__; GitHub ID: https://github.com/sparrell; WWW: `sFractal `__ About OASIS TC Open Repositories -------------------------------- diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..17029190 --- /dev/null +++ b/conftest.py @@ -0,0 +1,3 @@ + +def pytest_addoption(parser): + parser.addoption("--backends", action="store", default="memory,mongo") diff --git a/docs/conf.py b/docs/conf.py index ec73fd40..552166d0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -67,7 +67,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -97,7 +97,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +# html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. diff --git a/docs/mongodb_schema.rst b/docs/mongodb_schema.rst index ccc0aebd..4120e79c 100644 --- a/docs/mongodb_schema.rst +++ b/docs/mongodb_schema.rst @@ -4,22 +4,33 @@ Design of the TAXII Server Mongo DB Schema for *medallion* As *medallion* is a prototype TAXII server implementation, the schema design for a Mongo DB is relatively straightforward. -Each Mongo database contains one or more collections. The term "collection" in Mongo DBs is similar to the concept of a table in a relational database. Collections contain "documents", similar to records. +Each Mongo database contains one or more collections. The term "collection" in Mongo DBs is similar to the concept of a table in a relational database. Collections contain "documents", somewhat analogous to table rows. It is unfortunate that the term "collection" is also used to signify something unrelated in the TAXII specification. We will use the phrase "taxii collection" to distinguish them. -An instance of this schema can be populated via the file test/data/initialize_mongodb.py. This instance will be used for examples below. +You can initialize the database with content by specifying a JSON file in the backend section of the medallion configuration. The JSON file containing TAXII server content must have a particular structure. Refer to medallion/test/data/default_data.json for an example of the required structure. -Utilities to initialize your own Mongo DB can be found in test/generic_initialize_mongodb.py. +An example configuration: + +.. code-block:: json + + { + "backend": { + "module_class": "MongoBackend", + "uri": "", + "filename": "" + } + } + +.. important:: + To avoid accidentally deleting data, the Mongo backend will check whether the database appears to have already been initialized. If so, it will not change anything. To override the safety check and always reinitialize the database, add another backend setting: ``"clear_db": true``. The discovery database ---------------------- -Basic metadata contained in the mongo database named **discovery_database**. +Basic metadata is contained in the mongo database named **discovery_database**. The discovery_database contains two collections: -The discovery_database contains two collections: - -**discovery_information**. It should only contain only one "document", which is the discovery information that would be returned from the Discovery endpoint. Here is the document from the example database. +**discovery_information** should only contain only one "document", which is the discovery information that would be returned from the Discovery endpoint. Here is the document from the example database. .. code-block:: json @@ -27,7 +38,7 @@ The discovery_database contains two collections: "title": "Some TAXII Server", "description": "This TAXII Server contains a listing of", "contact": "string containing contact information", - "default": "http://localhost:5000/api2/", + "default": "http://localhost:5000/trustgroup1/", "api_roots": [ "http://localhost:5000/api1/", "http://localhost:5000/api2/", @@ -45,7 +56,7 @@ Here is a document from the example database: "title": "Malware Research Group", "description": "A trust group setup for malware researchers", "versions": [ - "taxii-2.0" + "application/taxii+json;version=2.1" ], "max_content_length": 9765625, "_url": "http://localhost:5000/trustgroup1/", @@ -55,7 +66,8 @@ Here is a document from the example database: The api root databases ---------------------- -Each api root is contained in a separate Mongo DB database. It has four collections: **status**, **objects**, **manifests**, and **collections**. To support multiple taxii collections, any document in the **objects** and **manifests** contains an extra property, "collection_id", to link it to the taxii collection that it is contained in. Because "_collection_id" property is not part of the TAXII specification, it will be stripped by *medallion* before any document is returned to the client. +Each api root is contained in a separate Mongo DB database. It has three collections: **status**, **objects**, +and **collections**. A document from the **collections** collection: @@ -72,22 +84,31 @@ A document from the **collections** collection: ] } +Because the STIX objects and the manifest entries correspond one-to-one, the manifest is stored with the object. It keeps all information about an object in one place and avoids the complexity and overhead of needing to join documents. Also, timestamps are stored as numbers due to the millisecond precision limitation of the Mongo built-in ``Date`` type. These documents are converted to proper STIX or TAXII JSON format as needed. + A document from the **objects** collection: .. code-block:: json { - "created": "2014-05-08T09:00:00.000Z", - "id": "indicator--a932fcc6-e032-176c-126f-cb970a5a1ade", - "labels": [ - "file-hash-watchlist" + "created": 1485524993.997, + "description": "Poison Ivy", + "id": "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", + "is_family": true, + "malware_types": [ + "remote-access-trojan" ], - "modified": "2014-05-08T09:00:00.000Z", - "name": "File hash for Poison Ivy variant", - "pattern": "[file:hashes.'SHA-256' = 'ef537f25c895bfa782526529a9b63d97aa631564d5d789c2b765448c8635fb6c']", - "type": "indicator", - "valid_from": "2014-05-08T09:00:00.000000Z", - "_collection_id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116" + "modified": 1485524993.997, + "name": "Poison Ivy", + "spec_version": "2.1", + "type": "malware", + "_collection_id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116", + "_manifest": { + "date_added": 1485524999.997, + "id": "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", + "media_type": "application/stix+json;version=2.1", + "version": 1485524993.997 + } } A document from the **status** collection: @@ -97,38 +118,33 @@ A document from the **status** collection: { "id": "2d086da7-4bdc-4f91-900e-d77486753710", "status": "pending", - "request_timestamp": "2016-11-02T12:34:34.12345Z", + "request_timestamp": "2016-11-02T12:34:34.123456Z", "total_count": 4, "success_count": 1, "successes": [ - "indicator--a932fcc6-e032-176c-126f-cb970a5a1ade" + { + "id": "indicator--cd981c25-8042-4166-8945-51178443bdac", + "version": "2014-05-08T09:00:00.000Z", + "message": "Successfully added object to collection '91a7b528-80eb-42ed-a74d-c6fbd5a26116'." + } ], "failure_count": 1, "failures": [ { "id": "malware--664fa29d-bf65-4f28-a667-bdb76f29ec98", + "version": "2015-05-08T09:00:00.000Z", "message": "Unable to process object" } ], "pending_count": 2, "pendings": [ - "indicator--252c7c11-daf2-42bd-843b-be65edca9f61", - "relationship--045585ad-a22f-4333-af33-bfd503a683b5" + { + "id": "indicator--252c7c11-daf2-42bd-843b-be65edca9f61", + "version": "2016-08-08T09:00:00.000Z" + }, + { + "id": "relationship--045585ad-a22f-4333-af33-bfd503a683b5", + "version": "2016-06-08T09:00:00.000Z" + } ] } - -A document from the **manifest** collection: - -.. code-block:: json - - { - "id": "indicator--a932fcc6-e032-176c-126f-cb970a5a1ade", - "date_added": "2016-11-01T10:29:05Z", - "versions": [ - "2014-05-08T09:00:00.000Z" - ], - "media_types": [ - "application/vnd.oasis.stix+json; version=2.0" - ], - "_collection_id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116" - } diff --git a/medallion/__init__.py b/medallion/__init__.py index 65283793..a8433872 100644 --- a/medallion/__init__.py +++ b/medallion/__init__.py @@ -2,11 +2,11 @@ import logging import warnings +import flask from flask import Response, current_app, json from flask_httpauth import HTTPBasicAuth from .backends import base as mbe_base -from .common import APPLICATION_INSTANCE from .exceptions import BackendError, InitializationError, ProcessingError from .version import __version__ # noqa from .views import MEDIA_TYPE_TAXII_V21 @@ -29,8 +29,6 @@ def set_config(flask_application_instance, prop_name, config): flask_application_instance.taxii_config = config[prop_name] else: flask_application_instance.taxii_config = {'max_page_size': 100} - if "interop_requirements" not in flask_application_instance.taxii_config: - flask_application_instance.taxii_config["interop_requirements"] = False elif prop_name == "users": try: flask_application_instance.users_config = config[prop_name] @@ -46,8 +44,11 @@ def set_config(flask_application_instance, prop_name, config): else: raise InitializationError("You did not give backend information in your config.", 408) + if "interop_requirements" not in flask_application_instance.backend_config: + flask_application_instance.backend_config["interop_requirements"] = False -def connect_to_backend(config_info, clear_db=False): + +def connect_to_backend(config_info): log.debug("Initializing backend configuration using: {}".format(config_info)) try: @@ -90,7 +91,6 @@ def connect_to_backend(config_info, clear_db=False): # Finally, instantiate the backend class with the configuration passed in try: - config_info["clear_db"] = clear_db return backend_cls(**config_info) except BaseException as exc: log.error("Failed to instantiate %r: %s", backend_cls_name, exc) @@ -100,12 +100,13 @@ def connect_to_backend(config_info, clear_db=False): def register_blueprints(flask_application_instance): from medallion.views import collections, discovery, manifest, objects - with flask_application_instance.app_context(): - log.debug("Registering medallion blueprints into {}".format(current_app)) - current_app.register_blueprint(collections.collections_bp) - current_app.register_blueprint(discovery.discovery_bp) - current_app.register_blueprint(manifest.manifest_bp) - current_app.register_blueprint(objects.objects_bp) + log.debug( + "Registering medallion blueprints into %s", flask_application_instance + ) + flask_application_instance.register_blueprint(collections.collections_bp) + flask_application_instance.register_blueprint(discovery.discovery_bp) + flask_application_instance.register_blueprint(manifest.manifest_bp) + flask_application_instance.register_blueprint(objects.objects_bp) @auth.get_password @@ -115,8 +116,7 @@ def get_pwd(username): return None -@APPLICATION_INSTANCE.errorhandler(500) -def handle_error(error): +def handle_error_other(error): e = { "title": "InternalError", "http_status": "500", @@ -129,7 +129,6 @@ def handle_error(error): ) -@APPLICATION_INSTANCE.errorhandler(ProcessingError) def handle_processing_error(error): e = { "title": str(error.__class__.__name__), @@ -144,7 +143,6 @@ def handle_processing_error(error): ) -@APPLICATION_INSTANCE.errorhandler(BackendError) def handle_backend_error(error): e = { "title": str(error.__class__.__name__), @@ -156,3 +154,31 @@ def handle_backend_error(error): status=error.status, mimetype=MEDIA_TYPE_TAXII_V21, ) + + +def register_error_handlers(flask_app): + + flask_app.register_error_handler(ProcessingError, handle_processing_error) + flask_app.register_error_handler(BackendError, handle_backend_error) + flask_app.register_error_handler(500, handle_error_other) + + +def create_app(config): + """ + Create a medallion Flask application based on the given configuration. + + :param config: A medallion configuration object (dict). + :return: A Flask instance + """ + app = flask.Flask("medallion") + + register_blueprints(app) + register_error_handlers(app) + + set_config(app, "users", config) + set_config(app, "taxii", config) + set_config(app, "backend", config) + + app.medallion_backend = connect_to_backend(app.backend_config) + + return app diff --git a/medallion/backends/base.py b/medallion/backends/base.py index fc7df3be..6cea05d2 100644 --- a/medallion/backends/base.py +++ b/medallion/backends/base.py @@ -1,9 +1,7 @@ import logging from urllib.parse import urlparse -from ..common import ( - APPLICATION_INSTANCE, TaskChecker, get_application_instance_config_values -) +from ..common import TaskChecker from ..exceptions import InitializationError # Module-level logger @@ -48,23 +46,24 @@ class Backend(object, metaclass=BackendRegistry): def __init__(self, **kwargs): self.next = {} - interop_requirements_enforced = get_application_instance_config_values(APPLICATION_INSTANCE, "taxii", "interop_requirements") + self.interop_requirements_enforced = kwargs.get( + "interop_requirements", False + ) + + self.checker = self.status_checker = None + if kwargs.get("run_cleanup_threads", True): self.timeout = kwargs.get("session_timeout", 30) - checker = TaskChecker(kwargs.get("check_interval", 10), self._pop_expired_sessions) - checker.start() + self.checker = TaskChecker(kwargs.get("check_interval", 10), self._pop_expired_sessions) + self.checker.start() self.status_retention = kwargs.get("status_retention", SECONDS_IN_24_HOURS) if self.status_retention != -1: - if self.status_retention < SECONDS_IN_24_HOURS and interop_requirements_enforced: + if self.status_retention < SECONDS_IN_24_HOURS and self.interop_requirements_enforced: # interop MUST requirement raise InitializationError("Status retention interval must be more than 24 hours", 408) - status_checker = TaskChecker(kwargs.get("check_interval", 10), self._pop_old_statuses) - status_checker.start() - else: - if interop_requirements_enforced: - # interop MUST requirement - raise InitializationError("Status retention interval must be more than 24 hours", 408) + self.status_checker = TaskChecker(kwargs.get("check_interval", 10), self._pop_old_statuses) + self.status_checker.start() def _get_all_api_roots(self): discovery_info = self.server_discovery() @@ -133,7 +132,7 @@ def get_collection(self, api_root, collection_id): """ raise NotImplementedError() - def get_object_manifest(self, api_root, collection_id, filter_args, allowed_filters, limit): + def get_object_manifest(self, api_root, collection_id, filter_args, limit): """ Fill: Implement the get_object_manifest TAXII endpoint by obtaining the metadata @@ -144,7 +143,6 @@ def get_object_manifest(self, api_root, collection_id, filter_args, allowed_filt collection_id (str): the id of the collection filter_args (werkzeug.datastructures.ImmutableMultiDict): query string from URL containing filter args - allowed_filters (tuple): STIX properties which are allowed in the filter for this endpoint limit (int): Used for pagination requests. limits objects to the amount specified Returns: @@ -185,7 +183,7 @@ def get_status(self, api_root, status_id): """ raise NotImplementedError() - def get_objects(self, api_root, collection_id, filter_args, allowed_filters, limit): + def get_objects(self, api_root, collection_id, filter_args, limit): """ Fill: Implement the get_objects TAXII endpoint by obtaining the data from a collection @@ -195,7 +193,6 @@ def get_objects(self, api_root, collection_id, filter_args, allowed_filters, lim collection_id (str): the id of the collection filter_args (werkzeug.datastructures.ImmutableMultiDict): query string from URL containing filter args - allowed_filters (tuple): STIX properties which are allowed in the filter for this endpoint limit (int): Used for pagination requests. limits objects to the amount specified Returns: @@ -228,7 +225,7 @@ def add_objects(self, api_root, collection_id, objs, request_time): """ raise NotImplementedError() - def get_object(self, api_root, collection_id, object_id, filter_args, allowed_filters, limit): + def get_object(self, api_root, collection_id, object_id, filter_args, limit): """ Fill: Implement the get_object TAXII endpoint by obtaining the data from a collection related @@ -240,7 +237,6 @@ def get_object(self, api_root, collection_id, object_id, filter_args, allowed_fi object_id (str): the id of the requested object filter_args (werkzeug.datastructures.ImmutableMultiDict): query string from URL containing filter args - allowed_filters (tuple): STIX properties which are allowed in the filter for this endpoint limit (int): Used for pagination requests. limits objects to the amount specified Returns: @@ -249,7 +245,7 @@ def get_object(self, api_root, collection_id, object_id, filter_args, allowed_fi """ raise NotImplementedError() - def delete_object(self, api_root, collection_id, object_id, filter_args, allowed_filters): + def delete_object(self, api_root, collection_id, object_id, filter_args): """ Fill: Implement the delete_object TAXII endpoint by obtaining the metadata for a selected @@ -261,7 +257,6 @@ def delete_object(self, api_root, collection_id, object_id, filter_args, allowed object_id (str): the id of the requested object filter_args (werkzeug.datastructures.ImmutableMultiDict): query string from URL containing filter args - allowed_filters (tuple): STIX properties which are allowed in the filter for this endpoint Returns: Nothing. @@ -269,7 +264,7 @@ def delete_object(self, api_root, collection_id, object_id, filter_args, allowed """ raise NotImplementedError() - def get_object_versions(self, api_root, collection_id, object_id, filter_args, allowed_filters, limit): + def get_object_versions(self, api_root, collection_id, object_id, filter_args, limit): """ Fill: Implement the get_object_versions TAXII endpoint by obtaining the metadata for a selected @@ -281,7 +276,6 @@ def get_object_versions(self, api_root, collection_id, object_id, filter_args, a object_id (str): the id of the requested object filter_args (werkzeug.datastructures.ImmutableMultiDict): query string from URL containing filter args - allowed_filters (tuple): STIX properties which are allowed in the filter for this endpoint limit (int): Used for pagination requests. limits objects to the amount specified Returns: @@ -317,3 +311,16 @@ def _pop_old_statuses(self): """ raise NotImplementedError() + + def close(self): + # Shut down task checkers for this backend, if they are active. + + # If subclasses never call their super().__init__() to set up these + # attributes, they may not exist... + checker = getattr(self, "checker", None) + status_checker = getattr(self, "status_checker", None) + + if checker: + self.checker.stop() + if status_checker: + self.status_checker.stop() diff --git a/medallion/backends/memory_backend.py b/medallion/backends/memory_backend.py index 6b7045f3..9476d059 100644 --- a/medallion/backends/memory_backend.py +++ b/medallion/backends/memory_backend.py @@ -1,42 +1,165 @@ -import copy -import io import json import logging import os import uuid import environ -from six import string_types from ..common import ( - APPLICATION_INSTANCE, create_resource, datetime_to_float, - datetime_to_string, determine_spec_version, determine_version, find_att, - generate_status, generate_status_details, - get_application_instance_config_values, get_timestamp, iterpath, - string_to_datetime + create_resource, determine_spec_version, generate_status, + generate_status_details, get_timestamp, timestamp_to_datetime, + timestamp_to_epoch_seconds, timestamp_to_stix_json, + timestamp_to_taxii_json ) -from ..exceptions import InitializationError, ProcessingError -from ..filters.basic_filter import BasicFilter +from ..exceptions import MemoryBackendError, ProcessingError +from ..filters.memory_filter import MemoryFilter from .base import Backend # Module-level logger log = logging.getLogger(__name__) -def remove_hidden_field(objs): +class Meta: + """ + Contains metadata about a STIX object, for use in the memory backend. + Having this allows me to have data which is useful but should not be + stored in the memory backend datafile. Having a custom type lets me + customize JSON-serialization. + """ + def __init__(self, date_added, media_type, version): + self.date_added = timestamp_to_datetime(date_added) + self.version = timestamp_to_datetime(version) + self.media_type = media_type + + # Extract and cache the spec version from media_type. Expected sample + # format for media_type: + # application/stix+json;version=2.1 + eq_idx = self.media_type.index("=") + self.spec_version = self.media_type[eq_idx+1:] + + # A plain string is probably fine for equality comparisons, but has + # the wrong order semantics. E.g. "2.10" < "2.2" as strings, which is + # incorrect. So cache an integer tuple for ordering purposes. + self.spec_version_tuple = tuple( + int(version_part) + for version_part in self.spec_version.split(".") + ) + + def __repr__(self): + return 'Meta("{}", "{}", "{}")'.format( + timestamp_to_taxii_json(self.date_added), + self.media_type, + timestamp_to_taxii_json(self.version) + ) + + +def _metafy_object(obj, *, date_added=None, media_type=None): + """ + Given a plain STIX object or object with a __meta key from the memory + backend data file, replace/add the value of __meta with an instance of + the Meta class, containing handy metadata. + + :param obj: The object to metafy + :param date_added: A value to use for date_added; the value from a + pre-existing __meta object is preferred, if it exists. This is just a + fallback. + :param media_type: A value to use for media_type; the value from a + pre-existing __meta object is preferred, if it exists. This is just a + fallback. + """ + obj_meta = obj.get("__meta", {}) + date_added = obj_meta.get("date_added", date_added) + media_type = obj_meta.get("media_type", media_type) + version = obj.get("modified") or obj.get("created") or date_added + + if not date_added: + # Should not happen. We are responsible for maintaining our own + # date_added timestamps. + raise MemoryBackendError( + "Internal error: object lacks a date_added timestamp:" + " {}/{}".format( + obj["id"], version + ), + 500 + ) + + if not media_type: + # Again, should not happen. Clients don't give us media types in their + # requests. We need to figure it out for ourselves. + raise MemoryBackendError( + "Internal error: object lacks a media_type: {}/{}".format( + obj["id"], version + ), + 500 + ) + + obj["__meta"] = Meta(date_added, media_type, version) + + +def meta_decoder(obj): + """A function used as a JSON decoder hook to instantiate Meta objects.""" + if "__meta" in obj: + _metafy_object(obj) + + return obj + + +class MetaEncoder(json.JSONEncoder): + """ + The JSON encoder associated with the above Meta class. Ensures meta + properties which should not be written to the data file, are masked. + """ + def default(self, value): + if isinstance(value, Meta): + return { + "date_added": timestamp_to_taxii_json(value.date_added), + "media_type": value.media_type + } + return super().default(value) + + +def _make_plain_objects(objs): + """ + From an iterable of memory backend object structures (which contain both + the STIX object and associated metadata), create a list of plain STIX + object structures. This removes any "extra" implementation detail stuff, + like the "__meta" key. A new list is returned; the given list is not + modified. + + :param objects: iterable of merged object/manifest structures + :return: list of plain STIX objects + """ + plain_objs = [] for obj in objs: - if "_date_added" in obj: - del obj["_date_added"] + # Shallow copy, to share object substructure, to reduce memory usage. + obj_copy = obj.copy() + obj_copy.pop("__meta", None) + plain_objs.append(obj_copy) + return plain_objs -def find_headers(headers, manifest, obj): - obj_time = find_att(obj) - for man in manifest: - if man["id"] == obj["id"] and obj_time == find_att(man): - if len(headers) == 0: - headers["X-TAXII-Date-Added-First"] = man["date_added"] - else: - headers["X-TAXII-Date-Added-Last"] = man["date_added"] + +def _make_manifests(objects): + """ + From an iterable of memory backend object structures (which contain both + the STIX object and associated metadata), create a list of manifest + resources. + + :param objects: iterable of merged object/manifest structures + :return: list of manifest resources + """ + manifests = [ + { + "id": obj["id"], + "date_added": timestamp_to_taxii_json(obj["__meta"].date_added), + "version": timestamp_to_stix_json(obj["__meta"].version), + "media_type": obj["__meta"].media_type + } + + for obj in objects + ] + + return manifests class MemoryBackend(Backend): @@ -58,16 +181,17 @@ def __init__(self, **kwargs): "it does not provide an external data backend. " "Set the 'force_wsgi' backend option to true to skip this." ) - if kwargs.get("filename"): - self.load_data_from_file(kwargs.get("filename")) - self.collections_manifest_check() + if "filename" in kwargs: + self.__discovery, self.__api_roots = \ + self.load_data_from_file(kwargs.get("filename")) else: - self.data = {} + self.__discovery = {} + self.__api_roots = {} super(MemoryBackend, self).__init__(**kwargs) def _pop_expired_sessions(self): expired_ids = [] - boundary = datetime_to_float(get_timestamp()) + boundary = timestamp_to_epoch_seconds(get_timestamp()) for next_id, record in self.next.items(): if boundary - record["request_time"] > self.timeout: expired_ids.append(next_id) @@ -77,411 +201,468 @@ def _pop_expired_sessions(self): def _pop_old_statuses(self): api_roots = self._get_all_api_roots() - boundary = datetime_to_float(get_timestamp()) + boundary = timestamp_to_epoch_seconds(get_timestamp()) + ids_to_del = [] for ar in api_roots: - statuses_of_api_root = copy.copy(self._get_api_root_statuses(ar)) - for s in statuses_of_api_root: - if boundary - datetime_to_float(string_to_datetime(s["request_timestamp"])) > self.status_retention: - self._get_api_root_statuses(ar).remove(s) - log.info("Status {} was deleted from {} because it was older than the status retention time".format(s['id'], ar)) + status_map = self.__api_roots.get(ar, {}).get("status", {}) + ids_to_del.clear() + for status_id, status in status_map.items(): + status_age = boundary - timestamp_to_epoch_seconds(status["request_timestamp"]) + if status_age > self.status_retention: + ids_to_del.append(status_id) + + for status_id in ids_to_del: + del status_map[status_id] + log.info("Status {} was deleted from {} because it was older than the status retention time".format(status_id, ar)) def set_next(self, objects, args): u = str(uuid.uuid4()) - if "limit" in args: - del args["limit"] + args.pop("limit", None) for arg in args: new_list = args[arg].split(',') new_list.sort() args[arg] = new_list - d = {"objects": objects, "args": args, "request_time": datetime_to_float(get_timestamp())} + d = {"objects": objects, "args": args, "request_time": timestamp_to_epoch_seconds(get_timestamp())} self.next[u] = d return u - def get_next(self, filter_args, allowed, manifest, lim): + def get_next(self, filter_args, lim): n = filter_args["next"] - if n in self.next: + paging_record = self.next.get(n) + if paging_record: for arg in filter_args: new_list = filter_args[arg].split(',') new_list.sort() filter_args[arg] = new_list del filter_args["next"] - del filter_args["limit"] - if filter_args != self.next[n]["args"]: + filter_args.pop("limit", None) + if filter_args != paging_record["args"]: raise ProcessingError("The server did not understand the request or filter parameters: params changed over subsequent transaction", 400) - t = self.next[n]["objects"] - length = len(self.next[n]["objects"]) - headers = {} - ret = [] - if length <= lim: - limit = length - more = False - nex = None - else: - limit = lim - more = True - for i in range(0, limit): - x = t.pop(0) - ret.append(x) - if len(headers) == 0: - find_headers(headers, manifest, x) - if i == limit - 1: - find_headers(headers, manifest, x) - if not more: - self.next.pop(n) + remaining_objs = paging_record["objects"] + next_page = remaining_objs[:lim] + remaining_objs = remaining_objs[lim:] + + if remaining_objs: + paging_record["objects"] = remaining_objs + more = True else: - nex = n + self.next.pop(n) + more = False + + headers = { + "X-TAXII-Date-Added-First": timestamp_to_taxii_json( + next_page[0]["__meta"].date_added + ), + "X-TAXII-Date-Added-Last": timestamp_to_taxii_json( + next_page[-1]["__meta"].date_added + ) + } - return ret, more, headers, nex + return next_page, more, headers else: raise ProcessingError("The server did not understand the request or filter parameters: 'next' not valid", 400) - def collections_manifest_check(self): - """ - Checks collections for proper manifest, if objects are present in a collection, a manifest should be present with - an entry for each entry in objects - """ - - for key, api_root in self.data.items(): - for collection in api_root.get('collections', []): - if not collection.get('objects'): - continue - if 'manifest' not in collection: - raise InitializationError("Collection {} manifest is missing".format(collection['id']), 408) - if not collection['manifest']: - raise InitializationError("Collection {} with objects has an empty manifest".format(collection['id']), 408) - for obj in collection.get('objects', []): - obj_time = find_att(obj) - obj_man_paired = False - for man in collection['manifest']: - man_time = find_att(man) - if obj['id'] == man['id'] and obj_time == man_time: - obj_man_paired = True - break - if not obj_man_paired: - raise InitializationError("Object with id {} from {} is missing a manifest".format(obj['id'], obj_time), 408) - def load_data_from_file(self, filename): - if isinstance(filename, string_types): - with io.open(filename, "r", encoding="utf-8") as infile: - self.data = json.load(infile) + if isinstance(filename, str): + with open(filename, "r", encoding="utf-8") as infile: + data = json.load(infile, object_hook=meta_decoder) else: - self.data = json.load(filename) + data = json.load(filename, object_hook=meta_decoder) + + api_roots = data + discovery = data.pop("/discovery") + + return discovery, api_roots def save_data_to_file(self, filename, **kwargs): """The kwargs are passed to ``json.dump()`` if provided.""" - if isinstance(filename, string_types): - with io.open(filename, "w", encoding="utf-8") as outfile: - json.dump(self.data, outfile, **kwargs) + file_contents = { + "/discovery": self.__discovery, + **self.__api_roots + } + if isinstance(filename, str): + with open(filename, "w", encoding="utf-8") as outfile: + json.dump(file_contents, outfile, cls=MetaEncoder, **kwargs) else: - json.dump(self.data, filename, **kwargs) - - def _get(self, key): - for ancestors, item in iterpath(self.data): - if key in ancestors: - return item + json.dump(file_contents, filename, cls=MetaEncoder, **kwargs) def server_discovery(self): - return self._get("/discovery") - - def _update_manifest(self, new_obj, api_root, collection_id, request_time): - api_info = self._get(api_root) - collections = api_info.get("collections", []) - media_type_fmt = "application/stix+json;version={}" - - for collection in collections: - if collection_id == collection["id"]: - version = determine_version(new_obj, request_time) - request_time = datetime_to_string(request_time) - media_type = media_type_fmt.format(determine_spec_version(new_obj)) - - # version is a single value now, therefore a new manifest is always created - collection["manifest"].append( - { - "id": new_obj["id"], - "date_added": request_time, - "version": version, - "media_type": media_type, - }, - ) - - # if the media type is new, attach it to the collection - if media_type not in collection["media_types"]: - collection["media_types"].append(media_type) + return self.__discovery - # quit once you have found the collection that needed updating - break - - def get_collections(self, api_root): - if api_root not in self.data: + def get_collections(self, api_root_name): + api_root = self.__api_roots.get(api_root_name) + if not api_root: return None # must return None so 404 is raised - api_info = self._get(api_root) - collections = copy.deepcopy(api_info.get("collections", [])) - + collections = api_root.get("collections", {}) + collection_resources = [] # Remove data that is not part of the response. - for collection in collections: - collection.pop("manifest", None) - collection.pop("responses", None) + for collection_id, collection in collections.items(): + collection = collection.copy() collection.pop("objects", None) + collection["id"] = collection_id # just in case + collection_resources.append(collection) # interop wants results sorted by id - if get_application_instance_config_values(APPLICATION_INSTANCE, "taxii", "interop_requirements"): - collections = sorted(collections, key=lambda o: o["id"]) - return create_resource("collections", collections) + if self.interop_requirements_enforced: + collection_resources = sorted(collection_resources, key=lambda o: o["id"]) + + return create_resource("collections", collection_resources) def get_collection(self, api_root, collection_id): - if api_root not in self.data: + collection = self.__api_roots.get(api_root, {}) \ + .get("collections", {}) \ + .get(collection_id) + + if not collection: return None # must return None so 404 is raised - api_info = self._get(api_root) - collections = copy.deepcopy(api_info.get("collections", [])) - - for collection in collections: - if collection_id == collection["id"]: - collection.pop("manifest", None) - collection.pop("responses", None) - collection.pop("objects", None) - return collection - - def get_object_manifest(self, api_root, collection_id, filter_args, allowed_filters, limit): - more = False - n = None - if api_root in self.data: - api_info = self._get(api_root) - collections = api_info.get("collections", []) - - for collection in collections: - if collection_id == collection["id"]: - if "next" in filter_args: - manifest = collection.get("manifest", []) - manifest, more, headers, n = self.get_next(filter_args, allowed_filters, manifest, limit) - else: - manifest = collection.get("manifest", []) - full_filter = BasicFilter(filter_args) - manifest, next_save, headers = full_filter.process_filter( - manifest, - allowed_filters, - None, - limit - ) - if len(next_save) != 0: - more = True - n = self.set_next(next_save, filter_args) - break - return create_resource("objects", manifest, more, n), headers - - def get_api_root_information(self, api_root): - if api_root in self.data: - api_info = self._get(api_root) - - if "information" in api_info: - return api_info["information"] - - def _get_api_root_statuses(self, api_root): - api_info = self._get(api_root) - - if "status" in api_info: - return api_info["status"] - - def get_status(self, api_root, status_id): - if api_root in self.data: - api_info = self._get(api_root) - - for status in api_info.get("status", []): - if status_id == status["id"]: - return status - - def get_objects(self, api_root, collection_id, filter_args, allowed_filters, limit): - more = False - n = None - if api_root in self.data: - api_info = self._get(api_root) - collections = api_info.get("collections", []) - objs = [] - for collection in collections: - if collection_id == collection["id"]: - manifest = collection.get("manifest", []) - if "next" in filter_args: - objs, more, headers, n = self.get_next(filter_args, allowed_filters, manifest, limit) - else: - objs = copy.deepcopy(collection.get("objects", [])) - full_filter = BasicFilter(filter_args) - objs, next_save, headers = full_filter.process_filter( - objs, - allowed_filters, - manifest, - limit - ) + collection = collection.copy() + collection.pop("objects", None) + collection["id"] = collection_id # just in case + + return collection + + def _get_objects( + self, api_root, collection_id, filter_args, limit + ): + """ + Search/page the given collection via the given filters. If filter_args + contains a "next" parameter, a paging record is looked up and consulted; + the collection is not searched. + + :param api_root: An API root name + :param collection_id: A collection ID + :param filter_args: HTTP filtering query parameters + :param limit: A page size limit; may be less than requested in HTTP + query parameters due to server-enforced max page size + :return: None if API root or collection were not found; a 4-tuple + otherwise. The 4-tuple contains: (list of objects in the first/next + page, a "more" boolean value representing whether there are any + more pages, paging key for use in a TAXII envelope or similar + resource to get subsequent pages, map containing special + X-TAXII-Date-Added-First/Last headers to be added to an HTTP + response). If no matching objects were found, + ([], False, None, {}) is returned. + """ + collection = self.__api_roots.get(api_root) \ + .get("collections", {}) \ + .get(collection_id) + + result = None + if collection: + + paging_key = filter_args.get("next") + if paging_key: + page_objects, more, headers = self.get_next(filter_args, limit) + else: + objects = collection.get("objects", []) + full_filter = MemoryFilter( + filter_args, self.interop_requirements_enforced + ) + page_objects, next_save, headers = full_filter.process_filter( + objects, + limit + ) + + if next_save: + more = True + paging_key = self.set_next(next_save, filter_args) + else: + more = False + + result = page_objects, more, paging_key, headers + + return result + + def get_object_manifest(self, api_root, collection_id, filter_args, limit): - if len(next_save) != 0: - more = True - n = self.set_next(next_save, filter_args) - break - remove_hidden_field(objs) - return create_resource("objects", objs, more, n), headers + result = self._get_objects( + api_root, collection_id, filter_args, limit + ) + + manifest_resource = headers = None + if result: + page_objects, more, paging_key, headers = result + + manifests = _make_manifests(page_objects) + manifest_resource = create_resource("objects", manifests, more, paging_key) + + return manifest_resource, headers + + def get_api_root_information(self, api_root_name): + api_info = self.__api_roots.get(api_root_name, {}).get("information") + + return api_info + + def _get_api_root_statuses(self, api_root_name): + status_map = self.__api_roots.get(api_root_name, {}).get("status", {}) + return status_map.values() + + def get_status(self, api_root_name, status_id): + status = self.__api_roots.get(api_root_name, {}) \ + .get("status", {}) \ + .get(status_id) + + if status is not None: + status["id"] = status_id # just in case + + return status + + def get_objects(self, api_root, collection_id, filter_args, limit): + + result = self._get_objects( + api_root, collection_id, filter_args, limit + ) + + envelope_resource = headers = None + if result: + page_objects, more, paging_key, headers = result + + page_objects = _make_plain_objects(page_objects) + envelope_resource = create_resource("objects", page_objects, more, paging_key) + + return envelope_resource, headers def _add_status(self, api_root_name, status): - self._get_api_root_statuses(api_root_name).append(status) - - def add_objects(self, api_root, collection_id, objs, request_time): - if api_root in self.data: - api_info = self._get(api_root) - collections = api_info.get("collections", []) - failed = 0 - succeeded = 0 - pending = 0 + api_root = self.__api_roots.get(api_root_name) + if api_root: + status_map = api_root.get("status") + if status_map is None: + status_map = {} + api_root["status"] = status_map + + status_map[status["id"]] = status + + def add_objects(self, api_root_name, collection_id, objs, request_time): + + api_root = self.__api_roots.get(api_root_name) + if api_root: + collection = api_root.get("collections", {}).get(collection_id) + + status = None + if collection: successes = [] failures = [] - for collection in collections: - if collection_id == collection["id"]: - if "objects" not in collection: - collection["objects"] = [] - try: - for new_obj in objs["objects"]: - version = determine_version(new_obj, request_time) - id_and_version_already_present = False - for obj in collection["objects"]: - if new_obj["id"] == obj["id"]: - if "modified" in new_obj: - if new_obj["modified"] == obj["modified"]: - id_and_version_already_present = True - break - else: - # There is no modified field, so this object is immutable - id_and_version_already_present = True - break - - if id_and_version_already_present: - message = "Object already added" - - else: - message = None - if "modified" not in new_obj and "created" not in new_obj: - new_obj["_date_added"] = version - collection["objects"].append(new_obj) - self._update_manifest(new_obj, api_root, collection["id"], request_time) - - # else: we already have the object, so this is a - # no-op. - - status_details = generate_status_details( - new_obj["id"], version, message - ) - successes.append(status_details) - succeeded += 1 - - except Exception as e: - raise ProcessingError("While processing supplied content, an error occurred", 422, e) + collection_objects = collection.get("objects") + if collection_objects is None: + collection_objects = [] + collection["objects"] = collection_objects + + if not isinstance(objs, dict): + raise ProcessingError( + "Invalid TAXII envelope", 422 + ) + + if "objects" not in objs: + raise ProcessingError( + 'Invalid TAXII envelope: missing "objects" property', 422 + ) + + for new_obj in objs["objects"]: + + if not isinstance(new_obj, dict): + failures.append( + generate_status_details( + "", + "", + "Not an object: " + str(new_obj) + ) + ) + continue + + spec_version = determine_spec_version(new_obj) + media_type = "application/stix+json;version=" \ + + spec_version + + try: + + _metafy_object( + new_obj, + date_added=request_time, + media_type=media_type + ) + + id_and_version_already_present = False + for obj in collection_objects: + if new_obj["id"] == obj["id"] \ + and new_obj["__meta"].version == obj["__meta"].version: + id_and_version_already_present = True + break + + if id_and_version_already_present: + message = "Object already added" - status = generate_status( - datetime_to_string(request_time), "complete", succeeded, - failed, pending, successes=successes, - failures=failures, - ) - api_info["status"].append(status) - return status - - def get_object(self, api_root, collection_id, object_id, filter_args, allowed_filters, limit): - more = False - n = None - if api_root in self.data: - api_info = self._get(api_root) - collections = api_info.get("collections", []) - objs = [] - manifests = [] - for collection in collections: - if collection_id == collection["id"]: - manifests = collection.get("manifest", []) - if "next" in filter_args: - objs, more, headers, n = self.get_next(filter_args, allowed_filters, manifests, limit) else: - for obj in collection.get("objects", []): - if object_id == obj["id"]: - objs.append(copy.deepcopy(obj)) - if len(objs) == 0: - raise ProcessingError("Object '{}' not found".format(object_id), 404) - full_filter = BasicFilter(filter_args) - objs, next_save, headers = full_filter.process_filter( - objs, - allowed_filters, - manifests, - limit + message = None + + collection_objects.append(new_obj) + + # if the media type is new, attach it to the collection + # (Note: aren't we supposed to be enforcing the + # collection setting, not allowing users to change it?) + if media_type not in collection["media_types"]: + collection["media_types"].append(media_type) + + status_details = generate_status_details( + new_obj["id"], + timestamp_to_stix_json(new_obj["__meta"].version), + message + ) + successes.append(status_details) + + except Exception as e: + # Who knows how messed up this object is... maybe don't + # assume it has any property we need. + version = getattr(new_obj.get("__meta"), "version", None) + if version: + version = timestamp_to_stix_json(version) + + failures.append( + generate_status_details( + new_obj.get("id", ""), + version + or new_obj.get("modified") + or new_obj.get("created") + or "", + str(e) ) - if len(next_save) != 0: - more = True - n = self.set_next(next_save, filter_args) - break - remove_hidden_field(objs) - return create_resource("objects", objs, more, n), headers - - def delete_object(self, api_root, collection_id, obj_id, filter_args, allowed_filters): - if api_root in self.data: - api_info = self._get(api_root) - collections = api_info.get("collections", []) - objs = [] - manifests = [] - for collection in collections: - if "id" in collection and collection_id == collection["id"]: - coll = collection.get("objects", []) - for obj in coll: - if obj_id == obj["id"]: - objs.append(obj) - manifests = collection.get("manifest", []) - break - - full_filter = BasicFilter(filter_args) - objs, nex, headers = full_filter.process_filter( - objs, - allowed_filters, - manifests, - None + ) + + status = generate_status( + timestamp_to_taxii_json(request_time), "complete", + successes=successes, failures=failures ) - if len(objs) == 0: - raise ProcessingError("Object '{}' not found".format(obj_id), 404) + status_map = api_root.get("status") + if status_map is None: + status_map = {} + api_root["status"] = status_map - for obj in objs: - if obj in coll: - coll.remove(obj) - obj_time = find_att(obj) - for man in manifests: - if obj["id"] == man["id"] and obj_time == find_att(man): - manifests.remove(man) - break + status_map[status["id"]] = status + + return status + + def _object_id_exists(self, api_root, collection_id, object_id): + """ + Inefficiently check for existence of an object with a given ID. + """ + + id_exists = False + + collection_objects = self.__api_roots.get(api_root) \ + .get("collections", {}) \ + .get(collection_id, {}) \ + .get("objects") + + if collection_objects: + id_exists = any( + obj["id"] == object_id for obj in collection_objects + ) - def get_object_versions(self, api_root, collection_id, object_id, filter_args, allowed_filters, limit): - more = False - n = None - if api_root in self.data: - api_info = self._get(api_root) - collections = api_info.get("collections", []) + return id_exists + def get_object(self, api_root, collection_id, object_id, filter_args, limit): + + # Need to do this silly special case check because the get object + # endpoint is defined behaviorally differently than get objects, so + # this can't be implemented simply in terms of that. If no object with + # the given ID exists, we must produce a 404; get objects produces 200 + # and an empty envelope. If an object with the given ID exists but the + # filters filtered all objects out, we must produce a 200 and empty + # envelope (this is compatible with get objects). So if no object + # matches all of the criteria, we need to know why so we can + # distinguish these two cases. Sadly, we have no efficient way to + # do an ID existence check at the moment... :\ + + if not self._object_id_exists(api_root, collection_id, object_id): + raise ProcessingError( + "Object '{}' not found".format(object_id), 404 + ) + + # From here on out, we can delegate to get_objects. + filter_args["match[id]"] = object_id + + envelope_resource, headers = self.get_objects( + api_root, collection_id, filter_args, limit + ) + + return envelope_resource, headers + + def delete_object(self, api_root_name, collection_id, obj_id, filter_args): + + api_root = self.__api_roots.get(api_root_name) + + collection = None + if api_root: + collection = api_root.get("collections", {}).get(collection_id) + else: + raise ProcessingError( + "API root '{}' not found".format(api_root_name), 404 + ) + + if collection: objs = [] - for collection in collections: - if collection_id == collection["id"]: - all_manifests = collection.get("manifest", []) - if "next" in filter_args: - objs, more, headers, n = self.get_next(filter_args, allowed_filters, all_manifests, limit) - objs = sorted(map(lambda x: x["version"], objs), reverse=True) - else: + coll = collection.get("objects", []) + for obj in coll: + if obj_id == obj["id"]: + objs.append(obj) + + if not objs: + raise ProcessingError( + "Object '{}' not found".format(obj_id), 404 + ) - all_manifests = collection.get("manifest", []) - for manifest in all_manifests: - if object_id == manifest["id"]: - objs.append(manifest) - if len(objs) == 0: - raise ProcessingError("Object '{}' not found".format(object_id), 404) - full_filter = BasicFilter(filter_args) - objs, next_save, headers = full_filter.process_filter( - objs, - allowed_filters, - None, - limit - ) - if len(next_save) != 0: - more = True - n = self.set_next(next_save, filter_args) - objs = sorted(map(lambda x: x["version"], objs), reverse=True) - break - return create_resource("versions", objs, more, n), headers + full_filter = MemoryFilter( + filter_args, self.interop_requirements_enforced + ) + objs, _, _ = full_filter.process_filter( + objs + ) + + for obj in objs: + coll.remove(obj) + else: + raise ProcessingError( + "Collection '{}' not found".format(collection_id), 404 + ) + + def get_object_versions(self, api_root, collection_id, object_id, filter_args, limit): + + # Need a special case for object ID not found. _get_objects() will + # return no results, but we won't know why, so we need this + # disambiguating check. + if not self._object_id_exists(api_root, collection_id, object_id): + raise ProcessingError( + "Object '{}' not found".format(object_id), 404 + ) + + # We don't expect any version filtering/limiting on this endpoint. + filter_args["match[version]"] = "all" + filter_args["match[id]"] = object_id + + # If I just delegate to get_object(), I'll get a resource ready for a + # response, without the __meta bits which I would like to use to get + # the versions. So I'll use _get_objects() directly instead. + result = self._get_objects( + api_root, collection_id, filter_args, limit + ) + + versions_resource = headers = None + if result: + + page_objects, more, paging_key, headers = result + + # Transform the page of objects into a versions resource, by + # extracting all of the version information. + versions = [ + timestamp_to_stix_json(obj["__meta"].version) + for obj in page_objects + ] + + versions_resource = create_resource( + "versions", versions, more, paging_key + ) + + return versions_resource, headers diff --git a/medallion/backends/mongodb_backend.py b/medallion/backends/mongodb_backend.py index e0e06abd..23d45692 100644 --- a/medallion/backends/mongodb_backend.py +++ b/medallion/backends/mongodb_backend.py @@ -1,4 +1,4 @@ -import io +import collections import json import logging import uuid @@ -6,16 +6,14 @@ import environ from pymongo import ASCENDING, IndexModel, MongoClient from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError -from six import string_types -# from ..config import get_application_instance_config_values +import medallion.filters.common + from ..common import ( - APPLICATION_INSTANCE, create_resource, datetime_to_float, - datetime_to_string, datetime_to_string_stix, determine_spec_version, - determine_version, float_to_datetime, generate_status, - generate_status_details, get_application_instance_config_values, - get_custom_headers, get_timestamp, parse_request_parameters, - string_to_datetime + create_resource, determine_spec_version, determine_version, + generate_status, generate_status_details, get_custom_headers, + get_timestamp, parse_request_parameters, timestamp_to_epoch_seconds, + timestamp_to_stix_json, timestamp_to_taxii_json ) from ..exceptions import ( InitializationError, MongoBackendError, ProcessingError @@ -27,6 +25,104 @@ log = logging.getLogger(__name__) +# Our special case property transformations need to occur in both directions, +# so we need pairs of transformation functions. This defines a type used to +# store related pairs of functions. +_PropertyTransformer = collections.namedtuple("PropertyTransformer", [ + "json_to_mongo", + "mongo_to_json" +]) + + +# Define any transformer function pairings we need. +_TIMESTAMP_TRANSFORMER = _PropertyTransformer( + timestamp_to_epoch_seconds, + timestamp_to_stix_json +) + + +# Define our property transformation policy. Maps STIX types to a mapping from +# top-level property name to a transformer object. The top-level None key +# is a special case which records property transformations to attempt on all +# STIX types. +# +# Only support transforming top-level properties, for now. Can expand on this +# later if necessary. +_PROPERTY_TRANSFORM_SPECIAL_CASES = { + "indicator": { + "valid_from": _TIMESTAMP_TRANSFORMER, + "valid_until": _TIMESTAMP_TRANSFORMER + }, + None: { + "modified": _TIMESTAMP_TRANSFORMER, + "created": _TIMESTAMP_TRANSFORMER + } +} + + +_MONGO_TIMESTAMP_FILTER = medallion.filters.common.TaxiiFilterInfo( + medallion.filters.common.StixType.TIMESTAMP, + timestamp_to_epoch_seconds +) + + +def _transform_special_case_properties(objs, transform_direction): + """ + Transform any property values which need to be stored in Mongo in a + different form than we receive as STIX JSON. The STIX JSON form may not be + suitable for some types of queries. This function can transform in both + the STIX JSON -> Mongo direction, and the reverse direction. + + The object(s) are modified in-place; there is no return value. + + :param objs: A single or list of objects (dicts) whose properties should be + examined and transformed. + :param transform_direction: Direction of transformation as a string: + "json_to_mongo" or "mongo_to_json". (They are used to look up a + function on a transformer object.) + """ + + if not isinstance(objs, list): + objs = [objs] + + type_neutral_cases = _PROPERTY_TRANSFORM_SPECIAL_CASES.get( + None + ) or {} # avoid unnecessarily creating an empty dict. + + for obj in objs: + type_specific_cases = _PROPERTY_TRANSFORM_SPECIAL_CASES.get( + obj["type"] + ) or {} + + # This prioritizes type-specific transformations over neutral ones, in + # case of conflict. Seems unlikely to happen though. + all_cases = collections.ChainMap( + type_specific_cases, type_neutral_cases + ) + + for prop_name, transformer in all_cases.items(): + if prop_name in obj: + prop_value = obj[prop_name] + trans_func = getattr(transformer, transform_direction) + obj[prop_name] = trans_func(prop_value) + + +def _customize_filters(): + """ + Override some defaults for filtering, with values suitable for this + backend. + """ + # Override some timestamp-typed filters to coerce to epoch seconds, due to + # how we store timestamps in mongo. Okay to overwrite the module globals + # since only one backend should be in use at a time. + medallion.filters.common.CALCULATION_PROPERTIES.update({ + "modified-gte": _MONGO_TIMESTAMP_FILTER, + "modified-lte": _MONGO_TIMESTAMP_FILTER, + "valid_until-gte": _MONGO_TIMESTAMP_FILTER, + "valid_from-lte": _MONGO_TIMESTAMP_FILTER + }) + + def catch_mongodb_error(func): """Catch mongodb availability error""" @@ -39,18 +135,6 @@ def api_wrapper(*args, **kwargs): return api_wrapper -def find_manifest_entries_for_id(obj, manifest): - for m in manifest: - if m["id"] == obj["id"]: - if "modified" in obj: - if m["version"] == obj["modified"]: - return m - else: - # handle data markings - if m["version"] == obj["created"]: - return m - - class MongoBackend(Backend): # access control is handled at the views level @@ -62,10 +146,19 @@ class Config(object): def __init__(self, **kwargs): try: + mongo_client = kwargs.get("mongo_client") + if mongo_client: + # If we are passed a connection, assume someone else is + # managing it; we will not close it ourselves. + self.client = mongo_client + self.owns_connection = False + else: + self.client = MongoClient(kwargs.get("uri")) + self.owns_connection = True + self.pages = {} - self.client = MongoClient(kwargs.get("uri")) - # unless clearing the db has been explicitly specified, don't initialize if the discovery_database exits + # unless clearing the db has been explicitly specified, don't initialize if the discovery_database exists # the discovery_databases is a minimally viable database, if not self.database_established() or kwargs.get("clear_db"): self.clear_db() @@ -79,6 +172,9 @@ def __init__(self, **kwargs): except ConnectionFailure: log.error("Unable to establish a connection to MongoDB server {}".format(kwargs.get("uri"))) + # Mongo backend specific filter overrides + _customize_filters() + def database_established(self): """ Checks to see if a medallion database exists @@ -89,7 +185,7 @@ def _process_params(self, filter_args, limit): next_id = filter_args.get("next") if limit and next_id is None: client_params = parse_request_parameters(filter_args) - record = {"skip": 0, "limit": limit, "args": client_params, "request_time": datetime_to_float(get_timestamp())} + record = {"skip": 0, "limit": limit, "args": client_params, "request_time": timestamp_to_epoch_seconds(get_timestamp())} next_id = str(uuid.uuid4()) self.pages[next_id] = record elif limit and next_id: @@ -99,7 +195,7 @@ def _process_params(self, filter_args, limit): if self.pages[next_id]["args"] != client_params: raise ProcessingError("The server did not understand the request or filter parameters: params changed over subsequent transaction", 400) self.pages[next_id]["limit"] = limit - self.pages[next_id]["request_time"] = datetime_to_float(get_timestamp()) + self.pages[next_id]["request_time"] = timestamp_to_epoch_seconds(get_timestamp()) record = self.pages[next_id] else: record = {} @@ -124,7 +220,7 @@ def _validate_object_id(self, manifest_info, collection_id, object_id): def _pop_expired_sessions(self): expired_ids = [] - boundary = datetime_to_float(get_timestamp()) + boundary = timestamp_to_epoch_seconds(get_timestamp()) for next_id, record in self.pages.items(): if boundary - record["request_time"] > self.timeout: expired_ids.append(next_id) @@ -168,7 +264,7 @@ def _pop_old_statuses(self): log.info("Status {} was deleted from {} because it was older than the status retention time".format(doc["id"], ar)) statuses_of_api_root.delete_one({"_id": doc["_id"]}) - def _get_object_manifest(self, api_root, collection_id, filter_args, allowed_filters, limit, internal=False): + def _get_object_manifest(self, api_root, collection_id, filter_args, limit, internal=False): api_root_db = self.client[api_root] objects_info = api_root_db["objects"] next_id, record = self._process_params(filter_args, limit) @@ -176,18 +272,17 @@ def _get_object_manifest(self, api_root, collection_id, filter_args, allowed_fil full_filter = MongoDBFilter( filter_args, {"_collection_id": {"$eq": collection_id}}, - allowed_filters, - record + record, + interop=self.interop_requirements_enforced ) count, objects_found = full_filter.process_filter( objects_info, - allowed_filters, "manifests", ) for obj in objects_found: - obj["date_added"] = datetime_to_string(float_to_datetime(obj["date_added"])) - obj["version"] = datetime_to_string_stix(float_to_datetime(obj["version"])) + obj["date_added"] = timestamp_to_taxii_json(obj["date_added"]) + obj["version"] = timestamp_to_stix_json(obj["version"]) next_id, more = self._update_record(next_id, count, internal) manifest_resource = create_resource("objects", objects_found, more, next_id) @@ -256,7 +351,7 @@ def get_collections(self, api_root): collection_info = api_root_db["collections"] collections = list(collection_info.find({}, {"_id": 0})) # interop wants results sorted by id - no need to check for interop option - if get_application_instance_config_values(APPLICATION_INSTANCE, "taxii", "interop_requirements"): + if self.interop_requirements_enforced: collections = sorted(collections, key=lambda o: o["id"]) return create_resource("collections", collections) @@ -271,8 +366,8 @@ def get_collection(self, api_root, collection_id): return info @catch_mongodb_error - def get_object_manifest(self, api_root, collection_id, filter_args, allowed_filters, limit): - return self._get_object_manifest(api_root, collection_id, filter_args, allowed_filters, limit, False) + def get_object_manifest(self, api_root, collection_id, filter_args, limit): + return self._get_object_manifest(api_root, collection_id, filter_args, limit, False) @catch_mongodb_error def get_api_root_information(self, api_root_name): @@ -300,7 +395,7 @@ def get_status(self, api_root, status_id): return result @catch_mongodb_error - def get_objects(self, api_root, collection_id, filter_args, allowed_filters, limit): + def get_objects(self, api_root, collection_id, filter_args, limit): api_root_db = self.client[api_root] objects_info = api_root_db["objects"] next_id, record = self._process_params(filter_args, limit) @@ -308,24 +403,19 @@ def get_objects(self, api_root, collection_id, filter_args, allowed_filters, lim full_filter = MongoDBFilter( filter_args, {"_collection_id": {"$eq": collection_id}}, - allowed_filters, - record + record, + interop=self.interop_requirements_enforced ) # Note: error handling was not added to following call as mongo will # handle (user supplied) filters gracefully if they don't exist count, objects_found = full_filter.process_filter( objects_info, - allowed_filters, "objects" ) - for obj in objects_found: - if "modified" in obj: - obj["modified"] = datetime_to_string_stix(float_to_datetime(obj["modified"])) - if "created" in obj: - obj["created"] = datetime_to_string_stix(float_to_datetime(obj["created"])) + _transform_special_case_properties(objects_found, "mongo_to_json") - manifest_resource = self._get_object_manifest(api_root, collection_id, filter_args, allowed_filters, limit, True) + manifest_resource = self._get_object_manifest(api_root, collection_id, filter_args, limit, True) headers = get_custom_headers(manifest_resource) next_id, more = self._update_record(next_id, count) @@ -340,11 +430,7 @@ def _add_status(self, api_root_name, status): def add_objects(self, api_root, collection_id, objs, request_time): api_root_db = self.client[api_root] objects_info = api_root_db["objects"] - failed = 0 - succeeded = 0 - pending = 0 successes = [] - failures = [] media_fmt = "application/stix+json;version={}" try: @@ -352,7 +438,7 @@ def add_objects(self, api_root, collection_id, objs, request_time): media_type = media_fmt.format(determine_spec_version(new_obj)) mongo_query = {"_collection_id": collection_id, "id": new_obj["id"], "_manifest.media_type": media_type} if "modified" in new_obj: - mongo_query["_manifest.version"] = datetime_to_float(string_to_datetime(new_obj["modified"])) + mongo_query["_manifest.version"] = timestamp_to_epoch_seconds(new_obj["modified"]) existing_entry = objects_info.find_one(mongo_query) obj_version = determine_version(new_obj, request_time) @@ -362,42 +448,36 @@ def add_objects(self, api_root, collection_id, objs, request_time): else: message = None new_obj.update({"_collection_id": collection_id}) - if "modified" in new_obj: - new_obj["modified"] = datetime_to_float(string_to_datetime(new_obj["modified"])) - if "created" in new_obj: - new_obj["created"] = datetime_to_float(string_to_datetime(new_obj["created"])) + _transform_special_case_properties(new_obj, "json_to_mongo") _manifest = { "id": new_obj["id"], - "date_added": datetime_to_float(request_time), - "version": datetime_to_float(string_to_datetime(obj_version)), + "date_added": timestamp_to_epoch_seconds(request_time), + "version": timestamp_to_epoch_seconds(obj_version), "media_type": media_type, } new_obj.update({"_manifest": _manifest}) objects_info.insert_one(new_obj) self._update_manifest(api_root, collection_id, media_type) - # else: we already have the object, so this is a - # no-op. - status_detail = generate_status_details( - new_obj["id"], obj_version, message + new_obj["id"], timestamp_to_stix_json(obj_version), + message ) successes.append(status_detail) - succeeded += 1 except Exception as e: # log.exception(e) raise ProcessingError("While processing supplied content, an error occurred", 422, e) status = generate_status( - datetime_to_string(request_time), "complete", succeeded, failed, - pending, successes=successes, failures=failures, + timestamp_to_taxii_json(request_time), "complete", + successes=successes ) api_root_db["status"].insert_one(status) status.pop("_id", None) return status @catch_mongodb_error - def get_object(self, api_root, collection_id, object_id, filter_args, allowed_filters, limit): + def get_object(self, api_root, collection_id, object_id, filter_args, limit): api_root_db = self.client[api_root] objects_info = api_root_db["objects"] # set manually to properly retrieve manifests, and early to not break the pagination checks @@ -409,29 +489,24 @@ def get_object(self, api_root, collection_id, object_id, filter_args, allowed_fi full_filter = MongoDBFilter( filter_args, {"_collection_id": {"$eq": collection_id}, "id": {"$eq": object_id}}, - allowed_filters, - record + record, + interop=self.interop_requirements_enforced ) count, objects_found = full_filter.process_filter( objects_info, - allowed_filters, "objects" ) - for obj in objects_found: - if "modified" in obj: - obj["modified"] = datetime_to_string_stix(float_to_datetime(obj["modified"])) - if "created" in obj: - obj["created"] = datetime_to_string_stix(float_to_datetime(obj["created"])) + _transform_special_case_properties(objects_found, "mongo_to_json") - manifest_resource = self._get_object_manifest(api_root, collection_id, filter_args, ("id", "type", "version", "spec_version"), limit, True) + manifest_resource = self._get_object_manifest(api_root, collection_id, filter_args, limit, True) headers = get_custom_headers(manifest_resource) next_id, more = self._update_record(next_id, count) return create_resource("objects", objects_found, more, next_id), headers @catch_mongodb_error - def delete_object(self, api_root, collection_id, object_id, filter_args, allowed_filters): + def delete_object(self, api_root, collection_id, object_id, filter_args): api_root_db = self.client[api_root] objects_info = api_root_db["objects"] @@ -441,11 +516,10 @@ def delete_object(self, api_root, collection_id, object_id, filter_args, allowed full_filter = MongoDBFilter( filter_args, {"_collection_id": {"$eq": collection_id}, "id": {"$eq": object_id}}, - allowed_filters, + interop=self.interop_requirements_enforced ) count, objects_found = full_filter.process_filter( objects_info, - allowed_filters, "raw" ) if objects_found: @@ -458,7 +532,7 @@ def delete_object(self, api_root, collection_id, object_id, filter_args, allowed raise ProcessingError("Object '{}' not found".format(object_id), 404) @catch_mongodb_error - def get_object_versions(self, api_root, collection_id, object_id, filter_args, allowed_filters, limit): + def get_object_versions(self, api_root, collection_id, object_id, filter_args, limit): api_root_db = self.client[api_root] objects_info = api_root_db["objects"] # set manually to properly retrieve manifests, and early to not break the pagination checks @@ -471,26 +545,25 @@ def get_object_versions(self, api_root, collection_id, object_id, filter_args, a full_filter = MongoDBFilter( filter_args, {"_collection_id": {"$eq": collection_id}, "id": {"$eq": object_id}}, - allowed_filters, - record + record, + interop=self.interop_requirements_enforced ) count, manifests_found = full_filter.process_filter( objects_info, - allowed_filters, "manifests", ) - manifest_resource = self._get_object_manifest(api_root, collection_id, filter_args, ("id", "type", "version", "spec_version"), limit, True) + manifest_resource = self._get_object_manifest(api_root, collection_id, filter_args, limit, True) headers = get_custom_headers(manifest_resource) - manifests_found = list(map(lambda x: datetime_to_string_stix(float_to_datetime(x["version"])), manifests_found)) + manifests_found = list(map(lambda x: timestamp_to_stix_json(x["version"]), manifests_found)) next_id, more = self._update_record(next_id, count) return create_resource("versions", manifests_found, more, next_id), headers def load_data_from_file(self, filename): try: - if isinstance(filename, string_types): - with io.open(filename, "r", encoding="utf-8") as infile: + if isinstance(filename, str): + with open(filename, "r", encoding="utf-8") as infile: self.json_data = json.load(infile) else: self.json_data = json.load(filename) @@ -515,28 +588,34 @@ def initialize_mongodb_with_data(self, filename): self.client.drop_database(api_root_name) api_db = self.client[api_root_name] if api_root_data["status"]: - api_db["status"].insert_many(api_root_data["status"]) + api_db["status"].insert_many(api_root_data["status"].values()) else: api_db.create_collection("status") api_db.create_collection("collections") api_db.create_collection("objects") - for collection in api_root_data["collections"]: - collection_id = collection["id"] - objects = collection["objects"] - manifest = collection["manifest"] + for collection_id, collection in api_root_data["collections"].items(): # these are not in the collections mongodb collection (both TAXII and Mongo DB use the term collection) - collection.pop("objects") - collection.pop("manifest") + objects = collection.pop("objects") api_db["collections"].insert_one(collection) for obj in objects: + + _transform_special_case_properties(obj, "json_to_mongo") + + obj_meta = obj.pop("__meta") obj["_collection_id"] = collection_id - obj["_manifest"] = find_manifest_entries_for_id(obj, manifest) - obj["_manifest"]["date_added"] = datetime_to_float(string_to_datetime(obj["_manifest"]["date_added"])) - obj["_manifest"]["version"] = datetime_to_float(string_to_datetime(obj["_manifest"]["version"])) - obj["created"] = datetime_to_float(string_to_datetime(obj["created"])) - if "modified" in obj: - # not for data markings - obj["modified"] = datetime_to_float(string_to_datetime(obj["modified"])) + date_added = timestamp_to_epoch_seconds(obj_meta["date_added"]) + version = timestamp_to_epoch_seconds( + obj.get("modified") + or obj.get("created") + or date_added + ) + obj["_manifest"] = { + "date_added": date_added, + "id": obj["id"], + "media_type": obj_meta["media_type"], + "version": version + } + api_db["objects"].insert_one(obj) id_index = IndexModel([("id", ASCENDING)]) type_index = IndexModel([("type", ASCENDING)]) @@ -554,15 +633,21 @@ def initialize_mongodb_with_data(self, filename): def clear_db(self): if "discovery_database" in self.client.list_database_names(): log.info("Clearing database") + discovery_db = self.client["discovery_database"] + api_root_info = discovery_db["api_root_info"] + for api_info in api_root_info.find({}): + self.client.drop_database(api_info["_name"]) self.client.drop_database("discovery_database") - discovery_db = self.client["discovery_database"] - api_root_info = discovery_db["api_root_info"] - for api_info in api_root_info.find({}): - self.client.drop_database(api_info["_name"]) - self.client.drop_database("discovery_database") # db with empty tables log.info("Creating empty database") discovery_db = self.client.get_database("discovery_database") discovery_db.create_collection("discovery_information") discovery_db.create_collection("api_root_info") return discovery_db + + def close(self): + # Important to call super.close() first, since it stops threads + # which might try to access a closed mongo connection. + super().close() + if self.owns_connection: + self.client.close() diff --git a/medallion/common.py b/medallion/common.py index 364bf6d4..5b735c5c 100644 --- a/medallion/common.py +++ b/medallion/common.py @@ -1,13 +1,8 @@ -import calendar import datetime as dt import threading import uuid -from flask import Flask import pytz -from six import iteritems - -APPLICATION_INSTANCE = Flask("medallion") def create_resource(resource_name, items, more=False, next_id=None): @@ -16,7 +11,7 @@ def create_resource(resource_name, items, more=False, next_id=None): if items: resource[resource_name] = items if resource_name == "objects" or resource_name == "versions": - if next_id and resource: + if more and next_id and resource: resource["next"] = next_id if resource: resource["more"] = more @@ -26,7 +21,13 @@ def create_resource(resource_name, items, more=False, next_id=None): def determine_version(new_obj, request_time): """Grab the modified time if present, if not grab created time, if not grab request time provided by server.""" - return new_obj.get("modified", new_obj.get("created", datetime_to_string(request_time))) + obj_version = new_obj.get("modified") or new_obj.get("created") + if obj_version: + obj_version = timestamp_to_datetime(obj_version) + else: + obj_version = request_time + + return obj_version def determine_spec_version(obj): @@ -40,62 +41,6 @@ def determine_spec_version(obj): return obj.get("spec_version", "2.0") -def get(data, key): - """Given a dict, loop recursively over the object. Returns the value based on the key match""" - for ancestors, item in iterpath(data): - if key in ancestors: - return item - - -def iterpath(obj, path=None): - """ - Generator which walks the input ``obj`` model. Each iteration yields a - tuple containing a list of ancestors and the property value. - - Args: - obj: A SDO or SRO object. - path: None, used recursively to store ancestors. - - Example: - >>> for item in iterpath(obj): - >>> print(item) - (['type'], 'campaign') - ... - (['cybox', 'objects', '[0]', 'hashes', 'sha1'], 'cac35ec206d868b7d7cb0b55f31d9425b075082b') - - Returns: - tuple: Containing two items: a list of ancestors and the property value. - - """ - if path is None: - path = [] - - for varname, varobj in iter(sorted(iteritems(obj))): - path.append(varname) - yield (path, varobj) - - if isinstance(varobj, dict): - - for item in iterpath(varobj, path): - yield item - - elif isinstance(varobj, list): - - for item in varobj: - index = "[{0}]".format(varobj.index(item)) - path.append(index) - - yield (path, item) - - if isinstance(item, dict): - for descendant in iterpath(item, path): - yield descendant - - path.pop() - - path.pop() - - def get_timestamp(): """Get current time with UTC offset""" return dt.datetime.now(tz=pytz.UTC) @@ -141,31 +86,150 @@ def datetime_to_string_stix(dttm): def datetime_to_float(dttm): """Given a datetime instance, return its representation as a float""" - # Based on this solution: https://stackoverflow.com/questions/30020988/python3-datetime-timestamp-in-python2 - if dttm.tzinfo is None: - return calendar.timegm(dttm.utctimetuple()) + dttm.microsecond / 1e6 - else: - return (dttm - dt.datetime(1970, 1, 1, tzinfo=pytz.UTC)).total_seconds() + return dttm.timestamp() def float_to_datetime(timestamp_float): """Given a floating-point number, produce a datetime instance""" - return dt.datetime.utcfromtimestamp(timestamp_float) + result = dt.datetime.utcfromtimestamp(timestamp_float) + result = result.replace(tzinfo=dt.timezone.utc) + return result def string_to_datetime(timestamp_string): """Convert string timestamp to datetime instance.""" try: - return dt.datetime.strptime(timestamp_string, "%Y-%m-%dT%H:%M:%S.%fZ") + result = dt.datetime.strptime(timestamp_string, "%Y-%m-%dT%H:%M:%S.%fZ") except ValueError: - return dt.datetime.strptime(timestamp_string, "%Y-%m-%dT%H:%M:%SZ") + result = dt.datetime.strptime(timestamp_string, "%Y-%m-%dT%H:%M:%SZ") + + result = result.replace(tzinfo=dt.timezone.utc) + + return result + + +def timestamp_to_epoch_seconds(timestamp): + """ + Convert a timestamp to epoch seconds. This is a more general purpose + conversion function supporting a few different input types: strings, + numbers (i.e. value is already in epoch seconds), and datetime objects. + + :param timestamp: A timestamp as a string, number, or datetime object + :return: Number of epoch seconds (can be a float with fractional seconds) + """ + if isinstance(timestamp, (int, float)): + result = timestamp + elif isinstance(timestamp, str): + result = datetime_to_float(string_to_datetime(timestamp)) + elif isinstance(timestamp, dt.datetime): + result = timestamp.timestamp() + else: + raise TypeError( + "Can't convert {} to an epoch seconds timestamp".format( + type(timestamp) + ) + ) + + return result + + +def timestamp_to_stix_json(timestamp): + """ + Convert a timestamp to STIX JSON. This is a more general purpose + conversion function supporting a few different input types: strings + (i.e. value is already a STIX JSON timestamp), numbers (epoch seconds), and + datetime objects. + + :param timestamp: A timestamp as a string, number, or datetime object + :return: A STIX JSON timestamp string + """ + if isinstance(timestamp, (int, float)): + result = datetime_to_string_stix(float_to_datetime(timestamp)) + elif isinstance(timestamp, str): + result = timestamp # any format verification? + elif isinstance(timestamp, dt.datetime): + result = datetime_to_string_stix(timestamp) + else: + raise TypeError( + "Can't convert {} to a STIX JSON timestamp string".format( + type(timestamp) + ) + ) + + return result + + +def timestamp_to_taxii_json(timestamp): + """ + Convert a timestamp to TAXII JSON. This is a more general purpose + conversion function supporting a few different input types: strings + (i.e. value is already a TAXII JSON timestamp), numbers (epoch seconds), + and datetime objects. From the TAXII spec: "Unlike the STIX timestamp + type, the TAXII timestamp MUST have microsecond precision." + + :param timestamp: A timestamp as a string, number, or datetime object + :return: A TAXII JSON timestamp string + """ + if isinstance(timestamp, (int, float)): + result = datetime_to_string(float_to_datetime(timestamp)) + elif isinstance(timestamp, str): + result = timestamp # any format verification? + elif isinstance(timestamp, dt.datetime): + result = datetime_to_string(timestamp) + else: + raise TypeError( + "Can't convert {} to a TAXII JSON timestamp string".format( + type(timestamp) + ) + ) + + return result + + +def timestamp_to_datetime(timestamp): + """ + Convert a timestamp to a datetime object. This is a more general purpose + conversion function supporting a few different input types: strings, + numbers (epoch seconds), and datetime objects. + + :param timestamp: A timestamp as a string, number, or datetime object + :return: A timezone-aware datetime object in the UTC timezone + """ + if isinstance(timestamp, (int, float)): + result = float_to_datetime(timestamp) + elif isinstance(timestamp, str): + result = string_to_datetime(timestamp) + elif isinstance(timestamp, dt.datetime): + + # If no timezone, treat as UTC directly + if timestamp.tzinfo is None: + timestamp = timestamp.replace(tzinfo=dt.timezone.utc) + + # If timezone is not equivalent to UTC, convert to UTC (try to write + # this in a way which is agnostic to the actual tzinfo implementation). + elif timestamp.utcoffset() != dt.timezone.utc.utcoffset(None): + timestamp = timestamp.astimezone(dt.timezone.utc) + + result = timestamp + + else: + raise TypeError( + "Can't convert {} to a datetime instance".format( + type(timestamp) + ) + ) + + return result def generate_status( - request_time, status, succeeded, failed, pending, - successes=None, failures=None, pendings=None, + request_time, status, successes=(), failures=(), pendings=() ): """Generate Status Resource as defined in TAXII 2.1 section (4.3.1) `__.""" + succeeded = len(successes) + failed = len(failures) + pending = len(pendings) + status = { "id": str(uuid.uuid4()), "status": status, @@ -220,72 +284,44 @@ def parse_request_parameters(filter_args): return session_args -def find_att(obj): - """ - Used for finding the version attribute of an ambiguous object. Manifests - use the "version" field, but objects will use "modified", or if that's not - available, the "created" field. - - Args: - obj (dict): manifest or stix object - - Returns: - string value of the field from the object to use for versioning - - """ - if "version" in obj: - return string_to_datetime(obj["version"]) - elif "modified" in obj: - return string_to_datetime(obj["modified"]) - elif "created" in obj: - return string_to_datetime(obj["created"]) - else: - return string_to_datetime(obj["_date_added"]) - - -def find_version_attribute(obj): - """Depending on the object, modified, created or _date_added is used to store the - object version""" - if "modified" in obj: - return "modified" - elif "created" in obj: - return "created" - elif "_date_added" in obj: - return "_date_added" - - class TaskChecker(object): """Calls a target method every X seconds to perform a task.""" def __init__(self, interval, target_function): self.interval = interval self.target_function = target_function - self.thread = threading.Timer(interval=self.interval, function=self.handle_function) - self.thread.daemon = True + self.lock = threading.Lock() + # One can "cancel" a timer, but that does nothing if the time has + # already expired. In that case, we need this flag to tell it to not + # schedule a new timer. + self.stop_flag = False + + # Create a task checker in an un-started state. + self.__reset_timer(start=False) def handle_function(self): self.target_function() - self.thread = threading.Timer(interval=self.interval, function=self.handle_function) - self.thread.daemon = True - self.thread.start() + self.__reset_timer() + + def __reset_timer(self, start=True): + with self.lock: + if not self.stop_flag: + self.thread = threading.Timer( + interval=self.interval, function=self.handle_function + ) + self.thread.daemon = True + if start: + self.start() def start(self): self.thread.start() - -def get_application_instance_config_values(flask_application_instance, config_group, config_key=None): - if config_group == "taxii": - if flask_application_instance.taxii_config and config_key in flask_application_instance.taxii_config: - return flask_application_instance.taxii_config[config_key] - else: - return flask_application_instance.taxii_config - if config_group == "users": - if flask_application_instance.users_config and config_key in flask_application_instance.users_config: - return flask_application_instance.users_config[config_key] - else: - return flask_application_instance.users_config - if config_group == "backend": - if flask_application_instance.backend_config and config_key in flask_application_instance.backend_config: - return flask_application_instance.backend_config[config_key] - else: - return flask_application_instance.backend_config + def stop(self, timeout=None): + with self.lock: + self.thread.cancel() + self.stop_flag = True + # Implies a timer thread must not call this method! + # It can be important to wait for thread termination: a backend has to + # be careful not to release resources a task checker thread might use, + # before the thread has terminated. + self.thread.join(timeout) diff --git a/medallion/exceptions.py b/medallion/exceptions.py index a62765f0..b6447b28 100644 --- a/medallion/exceptions.py +++ b/medallion/exceptions.py @@ -38,3 +38,8 @@ class BackendError(MedallionError): class MongoBackendError(BackendError): """Cannot connect or obtain access to MongoDB backend""" pass + + +class MemoryBackendError(BackendError): + """Internal error in the memory backend.""" + pass diff --git a/medallion/filters/basic_filter.py b/medallion/filters/basic_filter.py deleted file mode 100644 index 9623d01e..00000000 --- a/medallion/filters/basic_filter.py +++ /dev/null @@ -1,210 +0,0 @@ -import bisect -import operator - -from ..common import determine_spec_version, find_att, string_to_datetime - - -def check_for_dupes(final_match, final_track, res): - for obj in res: - found = 0 - pos = bisect.bisect_left(final_track, obj["id"]) - if not final_match or pos > len(final_track) - 1 or final_track[pos] != obj["id"]: - final_track.insert(pos, obj["id"]) - final_match.insert(pos, obj) - else: - obj_time = find_att(obj) - while pos != len(final_track) and obj["id"] == final_track[pos]: - if find_att(final_match[pos]) == obj_time: - found = 1 - break - else: - pos = pos + 1 - if found == 1: - continue - else: - final_track.insert(pos, obj["id"]) - final_match.insert(pos, obj) - - -def check_version(data, relate): - id_track = [] - res = [] - for obj in data: - pos = bisect.bisect_left(id_track, obj["id"]) - if not res or pos >= len(id_track) or id_track[pos] != obj["id"]: - id_track.insert(pos, obj["id"]) - res.insert(pos, obj) - else: - if relate(find_att(obj), find_att(res[pos])): - res[pos] = obj - return res - - -class BasicFilter(object): - - def __init__(self, filter_args): - self.filter_args = filter_args - self.match_type = self.filter_args.get("match[type]") - if self.match_type: - self.match_type = self.match_type.split(",") - self.match_id = self.filter_args.get("match[id]") - if self.match_id: - self.match_id = self.match_id.split(",") - self.added_after_date = self.filter_args.get("added_after") - self.match_spec_version = self.filter_args.get("match[spec_version]") - if self.match_spec_version: - self.match_spec_version = self.match_spec_version.split(",") - - def sort_and_paginate(self, data, limit, manifest): - temp = None - next_save = {} - headers = {} - new = [] - if len(data) == 0: - return new, next_save, headers - if manifest: - manifest.sort(key=lambda x: x['date_added']) - for man in manifest: - man_time = find_att(man) - for check in data: - check_time = find_att(check) - if check['id'] == man['id'] and check_time == man_time: - if len(headers) == 0: - headers["X-TAXII-Date-Added-First"] = man["date_added"] - new.append(check) - temp = man - if len(new) == limit: - headers["X-TAXII-Date-Added-Last"] = man["date_added"] - break - if limit and limit < len(data): - next_save = new[limit:] - new = new[:limit] - else: - headers["X-TAXII-Date-Added-Last"] = temp["date_added"] - else: - data.sort(key=lambda x: x['date_added']) - if limit and limit < len(data): - next_save = data[limit:] - data = data[:limit] - headers["X-TAXII-Date-Added-First"] = data[0]["date_added"] - headers["X-TAXII-Date-Added-Last"] = data[-1]["date_added"] - new = data - return new, next_save, headers - - @staticmethod - def check_added_after(obj, manifest_info, added_after_date): - added_after_timestamp = string_to_datetime(added_after_date) - # for manifest objects and versions - if manifest_info is None: - if string_to_datetime(obj["date_added"]) > added_after_timestamp: - return True - return False - # for other objects with manifests - else: - obj_time = find_att(obj) - for item in manifest_info: - item_time = find_att(item) - if item["id"] == obj["id"] and item_time == obj_time and string_to_datetime(item["date_added"]) > added_after_timestamp: - return True - return False - - @staticmethod - def filter_by_version(data, version): - # final_match is a sorted list of objects - final_match = [] - # final_track is a sorted list of id's - final_track = [] - - # return most recent object versions unless otherwise specified - if version is None: - version = "last" - version_indicators = version.split(",") - - if "all" in version_indicators: - # if "all" is in the list, just return everything - return data - - actual_dates = [string_to_datetime(x) for x in version_indicators if x != "first" and x != "last"] - # if a specific version is given, filter for objects with that value - if actual_dates: - id_track = [] - res = [] - for obj in data: - obj_time = find_att(obj) - if obj_time in actual_dates: - pos = bisect.bisect_left(id_track, obj["id"]) - id_track.insert(pos, obj["id"]) - res.insert(pos, obj) - final_match = res - final_track = id_track - - if "first" in version_indicators: - res = check_version(data, operator.lt) - check_for_dupes(final_match, final_track, res) - - if "last" in version_indicators: - res = check_version(data, operator.gt) - check_for_dupes(final_match, final_track, res) - - return final_match - - @staticmethod - def check_by_spec_version(obj, spec_, data): - if spec_: - if "media_type" in obj: - if any(s == obj["media_type"].split("version=")[1] for s in spec_): - return True - elif any(s == determine_spec_version(obj) for s in spec_): - return True - else: - add = True - if "media_type" in obj: - s1 = obj["media_type"].split("version=")[1] - else: - s1 = determine_spec_version(obj) - for match in data: - if "media_type" in match: - s2 = match["media_type"].split("version=")[1] - else: - s2 = determine_spec_version(match) - if obj["id"] == match["id"] and s2 > s1: - add = False - if add: - return True - return False - - def process_filter(self, data, allowed=(), manifest_info=(), limit=None): - filtered_by_version = [] - final_match = [] - save_next = [] - headers = {} - match_objects = [] - if (self.match_type and "type" in allowed) or (self.match_id and "id" in allowed) \ - or (self.added_after_date) or ("spec_version" in allowed): - for obj in data: - if self.match_type and "type" in allowed: - if not (any(s == obj.get("type") for s in self.match_type)) and not (any(s == obj.get("id").split("--")[0] for s in self.match_type)): - continue - if self.match_id and "id" in allowed: - if not ("id" in obj and any(s == obj["id"] for s in self.match_id)): - continue - - if self.added_after_date: - if not self.check_added_after(obj, manifest_info, self.added_after_date): - continue - - if "spec_version" in allowed: - if not self.check_by_spec_version(obj, self.match_spec_version, data): - continue - match_objects.append(obj) - else: - match_objects = data - if "version" in allowed: - match_version = self.filter_args.get("match[version]") - filtered_by_version = self.filter_by_version(match_objects, match_version) - else: - filtered_by_version = match_objects - - # sort objects by date_added of manifest and paginate as necessary - final_match, save_next, headers = self.sort_and_paginate(filtered_by_version, limit, manifest_info) - return final_match, save_next, headers diff --git a/medallion/filters/common.py b/medallion/filters/common.py new file mode 100644 index 00000000..e651cea5 --- /dev/null +++ b/medallion/filters/common.py @@ -0,0 +1,234 @@ +""" +Some informational and convenience APIs for filters. +""" +import collections +import enum + +import medallion.common + +# A type which collects common info about a TAXII filter. +TaxiiFilterInfo = collections.namedtuple( + "TaxiiFilterInfo", [ + # One of the StixType enum values. Should reflect the STIX-defined + # semantics of the property being filtered. I thought it might be + # useful in case someone wanted to make decisions based on the property + # type, beyond doing type coercion. + "stix_type", + + # This must be a function of one argument, which returns a value likely + # to be usable in the context of the aforementioned type. So, no + # one-size-fits-all here, but the defaults chosen in this module should + # hopefully be reasonable. Backend implementations can treat decisions + # in this module as defaults, and override them. + # + # It must at minimum be capable of converting from a string (useful + # for converting values from URL query parameters), and should also be + # idempotent (passing through a value if its type is already correct). + "type_coercer" + ] +) + + +class StixType(enum.Enum): + """ + STIX types, used as the value of the stix_type attribute of the namedtuple + above. + """ + + BOOLEAN = enum.auto() + INTEGER = enum.auto() + STRING = enum.auto() + TIMESTAMP = enum.auto() + + +_TLP_SHORT_NAME_MAP = { + "white": "marking-definition--613f2e26-407d-48c7-9eca-b8e91df99dc9", + "green": "marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da", + "amber": "marking-definition--f88d31f6-486f-44da-b317-01333bde0b82", + "red": "marking-definition--5e57c739-391a-4eb3-b6be-7d15ca92d5ed" +} + + +def tlp_short_name_to_id(tlp_short_name): + """ + Utility for the TAXII interop tier 3 "tlp" filter. That filter uses the + short names "white", "green", etc instead of marking definition IDs. This + function resolves a TLP short name as used in that filter, to a marking + definition ID. + + Raises TypeError/ValueError on type or value errors, to act similarly to + type coercers, rather than returning null (I'm imagining this function + could be used that way). Passes through valid TLP marking definition IDs, + for the sake of idempotence. + + :param tlp_short_name: A TLP "short name" + :raises ValueError: if tlp_short_name is not a recognized TLP short name + :raises TypeError: if tlp_short_name is not a string + """ + + if not isinstance(tlp_short_name, str): + raise TypeError( + "TLP marking short name must be a string: " + str(tlp_short_name) + ) + + # For idempotence + if tlp_short_name in _TLP_SHORT_NAME_MAP.values(): + marking_id = tlp_short_name + + else: + marking_id = _TLP_SHORT_NAME_MAP.get(tlp_short_name) + + if not marking_id: + raise ValueError( + "Unrecognized TLP marking short name: " + tlp_short_name + ) + + return marking_id + + +def bool_coerce(value): + """ + A coercer function to bool, which treats "false" as False. That's how + the STIX/TAXII boolean values are defined. In Python, bool("false") is + True, so we require something slightly more complex. + + :param value: The value to coerce to a bool + :return: True or False + """ + + result = bool(value) and value != "false" + + return result + + +# Some default sets of settings for the various STIX types. +# The type coercion functions may not be suitable for everyone, but these are +# some reasonable defaults, hopefully. +TAXII_STRING_FILTER = TaxiiFilterInfo(StixType.STRING, str) +TAXII_INTEGER_FILTER = TaxiiFilterInfo(StixType.INTEGER, int) +TAXII_BOOLEAN_FILTER = TaxiiFilterInfo(StixType.BOOLEAN, bool_coerce) +TAXII_TIMESTAMP_FILTER = TaxiiFilterInfo( + StixType.TIMESTAMP, medallion.common.timestamp_to_datetime +) +# Does not actually do a type coercion, but I think it has a useful default +# effect: convert the short name used in the TAXII interop tlp filter to a +# value to be used in a query (a marking definition ID). +TAXII_TLP_SHORT_NAME_FILTER = TaxiiFilterInfo( + StixType.STRING, tlp_short_name_to_id +) + + +BUILTIN_PROPERTIES = { + "id": TAXII_STRING_FILTER, + "type": TAXII_STRING_FILTER, + + # skipping version, spec_version, added_after, as special cases +} + + +TIER_1_PROPERTIES = { + "account_type": TAXII_STRING_FILTER, + "confidence": TAXII_INTEGER_FILTER, + "context": TAXII_STRING_FILTER, + "data_type": TAXII_STRING_FILTER, + "dst_port": TAXII_INTEGER_FILTER, + "encryption_algorithm": TAXII_STRING_FILTER, + "identity_class": TAXII_STRING_FILTER, + "name": TAXII_STRING_FILTER, + "number": TAXII_INTEGER_FILTER, + "opinion": TAXII_STRING_FILTER, + "pattern": TAXII_STRING_FILTER, + "pattern_type": TAXII_STRING_FILTER, + "primary_motivation": TAXII_STRING_FILTER, + "region": TAXII_STRING_FILTER, + "relationship_type": TAXII_STRING_FILTER, + "resource_level": TAXII_STRING_FILTER, + "result": TAXII_STRING_FILTER, + "revoked": TAXII_BOOLEAN_FILTER, + "src_port": TAXII_INTEGER_FILTER, + "sophistication": TAXII_STRING_FILTER, + "subject": TAXII_STRING_FILTER, + "value": TAXII_STRING_FILTER +} + + +TIER_2_PROPERTIES = { + "aliases": TAXII_STRING_FILTER, + "architecture_execution_envs": TAXII_STRING_FILTER, + "capabilities": TAXII_STRING_FILTER, + "extension_types": TAXII_STRING_FILTER, + "implementation_languages": TAXII_STRING_FILTER, + "indicator_types": TAXII_STRING_FILTER, + "infrastructure_types": TAXII_STRING_FILTER, + "labels": TAXII_STRING_FILTER, + "malware_types": TAXII_STRING_FILTER, + "personal_motivations": TAXII_STRING_FILTER, + "report_types": TAXII_STRING_FILTER, + "roles": TAXII_STRING_FILTER, + "secondary_motivations": TAXII_STRING_FILTER, + "sectors": TAXII_STRING_FILTER, + "threat_actor_types": TAXII_STRING_FILTER, + "tool_types": TAXII_STRING_FILTER +} + + +TIER_3_PROPERTIES = { + "address_family": TAXII_STRING_FILTER, + "external_id": TAXII_STRING_FILTER, + "MD5": TAXII_STRING_FILTER, + "SHA-1": TAXII_STRING_FILTER, + "SHA-256": TAXII_STRING_FILTER, + "SHA-512": TAXII_STRING_FILTER, + "SHA3-256": TAXII_STRING_FILTER, + "SHA3-512": TAXII_STRING_FILTER, + "SSDEEP": TAXII_STRING_FILTER, + "TLSH": TAXII_STRING_FILTER, + "integrity_level": TAXII_STRING_FILTER, + "pe_type": TAXII_STRING_FILTER, + "phase_name": TAXII_STRING_FILTER, + "service_status": TAXII_STRING_FILTER, + "service_type": TAXII_STRING_FILTER, + "socket_type": TAXII_STRING_FILTER, + "source_name": TAXII_STRING_FILTER, + "start_type": TAXII_STRING_FILTER, + "tlp": TAXII_TLP_SHORT_NAME_FILTER +} + + +RELATIONSHIP_PROPERTIES = { + "relationships-all": TAXII_STRING_FILTER +} + + +CALCULATION_PROPERTIES = { + "confidence-gte": TAXII_INTEGER_FILTER, + "confidence-lte": TAXII_INTEGER_FILTER, + "modified-gte": TAXII_TIMESTAMP_FILTER, + "modified-lte": TAXII_TIMESTAMP_FILTER, + "number-gte": TAXII_INTEGER_FILTER, + "number-lte": TAXII_INTEGER_FILTER, + "src_port-gte": TAXII_INTEGER_FILTER, + "src_port-lte": TAXII_INTEGER_FILTER, + "dst_port-gte": TAXII_INTEGER_FILTER, + "dst_port-lte": TAXII_INTEGER_FILTER, + "valid_until-gte": TAXII_TIMESTAMP_FILTER, + "valid_from-lte": TAXII_TIMESTAMP_FILTER +} + + +def get_filter_info(filter_name): + """ + Given a match filter name (the part inside square brackets in a + "match[...]" TAXII query parameter), find a TaxiiFilterInfo object for + the filter. The object gives some helpful info about the filter. + + :param filter_name: A match filter name (without surrounding "match[...]") + :return: A TaxiiFilterInfo object, or None if nothing is known about the + filter + """ + return BUILTIN_PROPERTIES.get(filter_name) \ + or TIER_1_PROPERTIES.get(filter_name) \ + or TIER_2_PROPERTIES.get(filter_name) \ + or TIER_3_PROPERTIES.get(filter_name) \ + or CALCULATION_PROPERTIES.get(filter_name) \ + or RELATIONSHIP_PROPERTIES.get(filter_name) diff --git a/medallion/filters/memory_filter.py b/medallion/filters/memory_filter.py new file mode 100644 index 00000000..7f784b59 --- /dev/null +++ b/medallion/filters/memory_filter.py @@ -0,0 +1,834 @@ +import collections +import itertools +import operator + +from ..common import timestamp_to_datetime, timestamp_to_taxii_json +from ..exceptions import ProcessingError +from .common import ( + BUILTIN_PROPERTIES, TAXII_INTEGER_FILTER, TAXII_STRING_FILTER, + TAXII_TIMESTAMP_FILTER, TAXII_TLP_SHORT_NAME_FILTER, TIER_1_PROPERTIES, + TIER_2_PROPERTIES, TIER_3_PROPERTIES +) + + +def _recurse_simple_valued_properties(value): + """ + Recursively search for and generate simple-valued property names and + values. + """ + + if isinstance(value, list): + for sub_value in value: + yield from _recurse_simple_valued_properties(sub_value) + + elif isinstance(value, dict): + for key, sub_value in value.items(): + if isinstance(sub_value, (list, dict)): + yield from _recurse_simple_valued_properties(sub_value) + else: + yield key, sub_value + + +def _simple_valued_properties(obj, include_toplevel=True): + """ + Find simple-valued properties of the given object. I.e. properties which + are neither dicts nor lists. This generates the prop names and values. + (And skip over the __meta info.) + + :param obj: The object to search + :param include_toplevel: Whether to include top level property names and + values in what is generated. + """ + for prop_name, prop_value in obj.items(): + if prop_name != "__meta": + if isinstance(prop_value, (list, dict)): + yield from _recurse_simple_valued_properties(prop_value) + elif include_toplevel: + yield prop_name, prop_value + + +def _ref_properties(value): + """ + Find reference property names and values from the given value. For _refs + properties, each value is generated separately, with the same key. + + :param value: The value to search + """ + if isinstance(value, list): + for sub_value in value: + yield from _ref_properties(sub_value) + + elif isinstance(value, dict): + for key, sub_value in value.items(): + if key.endswith("_ref"): + yield key, sub_value + elif key.endswith("_refs"): + for ref in sub_value: + yield key, ref + elif key != "__meta": + yield from _ref_properties(sub_value) + + +class Matcher: + """ + Abstract base class giving the most basic interface for evaluating an + object against some values given in a query, and producing a true/false + value. + """ + def match(self, obj, match_values): + """ + Perform a match on the given object using the given query values. + + :param obj: The object to match + :param match_values: An iterable of query values derived from a query + :return: True if the object matches; False if not + """ + raise NotImplementedError() + + +class SimplePropertyValueMatcher(Matcher): + """ + Abstract base class for matchers which operate by comparing a property + value from an object against a set of query values. This might involve + coercing both the query and property values to a particular type, to ensure + proper comparison semantics. So this class adds support for a type + coercer function. + + Subclasses will expect that match values (i.e. values taken from a query) + be coerced before passing them to the match() method. This is more + efficient than the match() method coercing the same values repeatedly for + each object being matched. The coerce_values() method is provided for + this. + """ + def __init__( + self, + *, + filter_info + ): + """ + Initialize an instance of this matcher. + + :param filter_info: filter info as a TaxiiFilterInfo object + """ + # We only need the type coercer function, for now + self.type_coercer = filter_info.type_coercer + + def coerce_values(self, values): + """ + Coerce the given iterable of values using this object's type coercer + function. Return the results of coercion as a set. Of course, this + requires that the type being is coerced to, is hashable. + + :param values: Iterable of values to coerce + :return: Set of coerced values + """ + return set( + self.type_coercer(value) + for value in values + ) + + +class TopLevelPropertyMatcher(SimplePropertyValueMatcher): + """ + A matcher which operates by checking the value of a top-level property on + an object. Deeper searches are not supported. This works on list-valued + properties as well as plain (non-list, non-object) properties. + """ + def __init__( + self, + toplevel_prop_name, + *, + filter_info, + default_value=None + ): + """ + Initialize an instance of this matcher. + + :param toplevel_prop_name: The top-level property name to look for + :param filter_info: filter info as a TaxiiFilterInfo object + :param default_value: A default value which will be treated as if it + were in effect if an object does not have the given top-level + property. If None, the given top-level property will not be + treated as having a default value. + """ + super().__init__(filter_info=filter_info) + + self.toplevel_prop_name = toplevel_prop_name + self.default_value = default_value + + def match(self, obj, match_values): + value = obj.get(self.toplevel_prop_name, self.default_value) + + if value is None: + # Object does not have the property of interest and no default is + # defined for it, so we just fail the match. + result = False + + else: + + if not isinstance(value, list): + value = [value] + + try: + coerced_values = self.coerce_values(value) + + except ValueError: + # Type coercion failure + result = False + + else: + result = not coerced_values.isdisjoint(match_values) + + return result + + +class SubPropertyMatcher(SimplePropertyValueMatcher): + """ + Matcher which matches on a value of a non-top-level simple-valued property. + A simple-valued property is one whose value is not a list or dict. The + whole object, excluding top-level properties, is searched for property(s) + of a given name, and their value(s) is checked. + """ + def __init__( + self, + sub_prop_name, + *, + filter_info + ): + super().__init__(filter_info=filter_info) + + self.sub_prop_name = sub_prop_name + + def match(self, obj, match_values): + result = False + + # This implementation allows a property name to occur in more than one + # place, and continues searching until a match is found or all + # properties are checked. Should we optimize and give up searching + # after the first occurrence (i.e. assume a given property never occurs + # in more than one place in an object)? + for prop_name, simple_prop_value \ + in _simple_valued_properties(obj, include_toplevel=False): + if prop_name == self.sub_prop_name: + try: + coerced_value = self.type_coercer(simple_prop_value) + except ValueError: + # Type coercion failure + result = False + else: + result = coerced_value in match_values + + if result: + break + + return result + + +class TLPMatcher(SimplePropertyValueMatcher): + """ + Matcher which checks TLP markings, including object and granular markings. + """ + + def __init__(self): + super().__init__( + # hard-code this; can't be anything else! + filter_info=TAXII_TLP_SHORT_NAME_FILTER + ) + + def match(self, obj, match_values): + # Dump all markings into the same set; there is no need to distinguish + # object from granular, for this purpose. + all_marking_refs = set(obj.get("object_marking_refs", [])) + + granular_markings = obj.get("granular_markings", []) + for granular_marking in granular_markings: + marking_ref = granular_marking.get("marking_ref") + if marking_ref: + all_marking_refs.add(marking_ref) + + result = not all_marking_refs.isdisjoint(match_values) + + return result + + +class RelationshipsAllMatcher(SimplePropertyValueMatcher): + """ + Matches objects based on their embedded references. + """ + def __init__(self): + super().__init__( + filter_info=TAXII_STRING_FILTER + ) + + def match(self, obj, match_values): + + result = False + for ref_prop_name, ref_prop_value in _ref_properties(obj): + + result = ref_prop_value in match_values + + if result: + break + + return result + + +class CalculationMatcher(SimplePropertyValueMatcher): + """ + Matches objects based on an arbitrary boolean valued function evaluated on + a property value and a query value, e.g. the property value be less than + at least one of the query values. + """ + def __init__(self, prop_name, op, *, filter_info): + super().__init__(filter_info=filter_info) + + self.prop_name = prop_name + self.op = op + + def match(self, obj, match_values): + + result = False + for prop_name, prop_value in _simple_valued_properties(obj): + + if prop_name == self.prop_name: + try: + prop_value = self.type_coercer(prop_value) + + except ValueError: + # Type coercion failure + result = False + + else: + result = any( + self.op(prop_value, match_value) + for match_value in match_values + ) + + if result: + break + + return result + + +class AddedAfterMatcher(SimplePropertyValueMatcher): + """ + Matches objects based on date_added metadata. + """ + def __init__(self): + super().__init__(filter_info=TAXII_TIMESTAMP_FILTER) + + def match(self, obj, match_values): + # In case there are multiple query values. But there shouldn't be. + match_value = min(match_values) + + return obj["__meta"].date_added > match_value + + +class SpecVersionMatcher(Matcher): + """ + Matcher which supports the TAXII spec_version match field. + """ + def __init__(self, data): + """ + Initialize this matcher. This prepares the matcher to operate on the + objects in the given data set, by setting up a data structure to make + it more efficient. + + :param data: A list of objects from the memory backend, which will be + subject to this matcher. + """ + + # Build a map from ID to a list of all objects of the latest spec + # version, with that ID. We treat plain versioning as being able to + # span spec versions, i.e. all objects with the same ID are part of + # the same history, regardless of spec_version. We need to find all + # objects of the latest spec version from each ID. + self.__spec_latest = collections.defaultdict(list) + for obj in data: + + latest_objects = self.__spec_latest[obj["id"]] + + if latest_objects: + obj_spec_version = obj["__meta"].spec_version_tuple + latest_spec_version = latest_objects[0]["__meta"].spec_version_tuple + + if obj_spec_version > latest_spec_version: + latest_objects.clear() + latest_objects.append(obj) + + elif obj_spec_version == latest_spec_version: + latest_objects.append(obj) + + else: + latest_objects.append(obj) + + def latest_objects(self): + yield from itertools.chain.from_iterable(self.__spec_latest.values()) + + def match(self, obj, match_values): + """ + Perform the match. If match_values is None, return a match if obj is + the latest spec version. + + :param obj: The object to match + :param match_values: A list of spec versions (strings), or None + :return: True if obj matches; False if not + """ + + result = False + if match_values: + result = obj["__meta"].spec_version in match_values + + else: + # if match_values is None, we want the latest spec version. + latest_objects = self.__spec_latest[obj["id"]] + + if latest_objects: + # Fearing "obj in latest_objects" might be slow. It might have + # to search all dict entries to determine equality of two + # dicts. But obj and the objects in latest_objects are + # versions from the same family of objects, so it is only + # necessary to check for a matching version. + result = obj["__meta"].version in ( + o["__meta"].version for o in latest_objects + ) + # else: An object we've never seen before?? Eval to false I + # guess? + + return result + + +class VersionMatcher(Matcher): + """ + Matcher which supports the TAXII version match field. + """ + def __init__(self, data): + """ + Initialize this matcher. This prepares the matcher to operate on the + objects in the given data set, by setting up a data structure to make + it more efficient. + + :param data: A list of objects from the memory backend, which will be + subject to this matcher. + """ + + # Map from ID to the earliest and latest versions of objects from that + # family of objects. We treat plain versioning as being able to + # span spec versions, i.e. all objects with the same ID are part of + # the same history, regardless of spec_version. This means the + # earliest and latest versions may not be of the same spec_version. + self.__earliest_latest_versions = {} + for obj in data: + + versions = self.__earliest_latest_versions.get(obj["id"]) + if versions: + earliest, latest = versions + if obj["__meta"].version < earliest["__meta"].version: + versions[0] = obj + if obj["__meta"].version > latest["__meta"].version: + versions[1] = obj + + else: + self.__earliest_latest_versions[obj["id"]] = [obj, obj] + + def earliest_objects(self): + """ + Generate the earliest objects from this matcher's internal data + structure; this enables some optimizations. + """ + for earliest, _ in self.__earliest_latest_versions.values(): + yield earliest + + def latest_objects(self): + """ + Generate the latest objects from this matcher's internal data + structure; this enables some optimizations. + """ + for _, latest in self.__earliest_latest_versions.values(): + yield latest + + def earliest_latest_objects(self): + """ + Generate all of the earlist and latest objects from this matcher's + internal data structure; this enables some optimizations. + """ + for earliest, latest in self.__earliest_latest_versions.values(): + yield earliest + + if earliest is not latest: + # In case the earliest and latest are the same, don't cause + # duplication of objects! + yield latest + + def match(self, obj, match_values): + """ + Perform the match. If match_values is None, return a match if obj is + the latest version. + + :param obj: The object to match + :param match_values: A list of versions, each of which can be "first", + "last", "all", or a datetime object; or None + :return: True if obj matches; False if not + """ + + versions = self.__earliest_latest_versions.get(obj["id"]) + + result = False + if versions: + earliest, latest = versions + + if match_values: + for match_value in match_values: + if match_value == "all": + result = True + elif match_value == "first": + result = obj is earliest + elif match_value == "last": + result = obj is latest + else: + # match_value is a datetime object + result = obj["__meta"].version == match_value + + if result: + break + + else: + # match_values is None; match only the latest object. + result = obj is latest + + # else: an object we've never seen before?? Eval to false I guess? + + return result + + +# These defined by the TAXII spec itself. +_BUILTIN_MATCHERS = { + match_type: TopLevelPropertyMatcher(match_type, filter_info=filter_info) + for match_type, filter_info in BUILTIN_PROPERTIES.items() +} + + +# Tier 1 defined as "simple top-level properties". +_INTEROP_TIER_1_MATCHERS = { + match_type: TopLevelPropertyMatcher( + match_type, + filter_info=filter_info, + # "revoked" is the one case where we have a default to consider + default_value=False if match_type == "revoked" else None + ) + for match_type, filter_info in TIER_1_PROPERTIES.items() +} + + +# Tier 2 defined as "array elements (lists) defined as top-level properties". +_INTEROP_TIER_2_MATCHERS = { + match_type: TopLevelPropertyMatcher(match_type, filter_info=filter_info) + for match_type, filter_info in TIER_2_PROPERTIES.items() +} + + +# Tier 3 defined as "properties defined within nested structures". +_INTEROP_TIER_3_MATCHERS = { + match_type: + TLPMatcher() if match_type == "tlp" # special matcher for tlp + else SubPropertyMatcher(match_type, filter_info=filter_info) + for match_type, filter_info in TIER_3_PROPERTIES.items() +} + + +_INTEROP_RELATIONSHIPS_MATCHERS = { + "relationships-all": RelationshipsAllMatcher() +} + + +_INTEROP_CALCULATION_MATCHERS = { + "confidence-gte": CalculationMatcher( + "confidence", operator.ge, filter_info=TAXII_INTEGER_FILTER + ), + "confidence-lte": CalculationMatcher( + "confidence", operator.le, filter_info=TAXII_INTEGER_FILTER + ), + "modified-gte": CalculationMatcher( + "modified", operator.ge, filter_info=TAXII_TIMESTAMP_FILTER + ), + "modified-lte": CalculationMatcher( + "modified", operator.le, filter_info=TAXII_TIMESTAMP_FILTER + ), + "number-gte": CalculationMatcher( + "number", operator.ge, filter_info=TAXII_INTEGER_FILTER + ), + "number-lte": CalculationMatcher( + "number", operator.le, filter_info=TAXII_INTEGER_FILTER + ), + "src_port-gte": CalculationMatcher( + "src_port", operator.ge, filter_info=TAXII_INTEGER_FILTER + ), + "src_port-lte": CalculationMatcher( + "src_port", operator.le, filter_info=TAXII_INTEGER_FILTER + ), + "dst_port-gte": CalculationMatcher( + "dst_port", operator.ge, filter_info=TAXII_INTEGER_FILTER + ), + "dst_port-lte": CalculationMatcher( + "dst_port", operator.le, filter_info=TAXII_INTEGER_FILTER + ), + "valid_until-gte": CalculationMatcher( + "valid_until", operator.ge, filter_info=TAXII_TIMESTAMP_FILTER + ), + "valid_from-lte": CalculationMatcher( + "valid_from", operator.le, filter_info=TAXII_TIMESTAMP_FILTER + ) +} + + +# Special case filter query param which does not use the match[...] syntax. +_ADDED_AFTER_MATCHER = AddedAfterMatcher() + + +def _speed_tier(filter_name): + """ + As an optimization, filters can be sorted such that faster matchers run + first. If a fast matcher rejects an object, it prevents slower matchers + from needing to run, which speeds up the filtering process. This function + is usable as a sort key function on filter names, to sort by speed. It + returns an integer "speed tier" which is just a simple integer performance + rating, where smaller is faster. + + :param filter_name: A filter name + :return: A speed tier as an integer + """ + + if filter_name.startswith("match[") and filter_name.endswith("]"): + filter_name = filter_name[6:-1] + + # Simple matchers on fixed properties should be quick + if filter_name in _BUILTIN_MATCHERS \ + or filter_name in _INTEROP_TIER_1_MATCHERS \ + or filter_name == "added_after": + speed_tier = 1 + + # Similarly quick to tier 1, but these need to search through list + # valued properties, so a bit slower + elif filter_name in _INTEROP_TIER_2_MATCHERS: + speed_tier = 2 + + # These need to search whole objects, which can be slow + elif filter_name in _INTEROP_TIER_3_MATCHERS \ + or filter_name in _INTEROP_RELATIONSHIPS_MATCHERS \ + or filter_name in _INTEROP_CALCULATION_MATCHERS: + speed_tier = 3 + + else: + speed_tier = 4 + + return speed_tier + + +def _get_property_matcher(filter_arg, interop): + """ + Get a pre-instantiated property matcher for the given filter, if one + exists. Most filters are like this; match[version] and match[spec_version] + are notable exceptions, since their behavior must depend on a larger + context than just the object being filtered. This means those matchers + can't be pre-instantiated, i.e. the same matcher instance can't be used for + all datasets. + + :param filter_arg: The value of a filter query parameter, e.g. "match[foo]" + :param interop: Whether to recognize interop filters. If True, additional + types of matchers may be returned. + :return: A matcher object, or None if one could not be found for the given + query parameter. + """ + matcher = None + + if filter_arg == "added_after": + matcher = _ADDED_AFTER_MATCHER + + elif filter_arg.startswith("match[") and filter_arg.endswith("]"): + filter_name = filter_arg[6:-1] + matcher = _BUILTIN_MATCHERS.get(filter_name) + + if not matcher and interop: + matcher = _INTEROP_TIER_1_MATCHERS.get(filter_name) \ + or _INTEROP_TIER_2_MATCHERS.get(filter_name) \ + or _INTEROP_TIER_3_MATCHERS.get(filter_name) \ + or _INTEROP_RELATIONSHIPS_MATCHERS.get(filter_name) \ + or _INTEROP_CALCULATION_MATCHERS.get(filter_name) + + return matcher + + +def _do_version_filter(objects, version_match_values): + """ + Performs match[version] filtering. + + :param objects: The objects to filter + :param version_match_values: The value of the match[version] query + parameter, or None. If None, treat as "last". + :return: A list of matching objects + """ + if version_match_values: + version_match_values = version_match_values.split(",") + else: + version_match_values = ["last"] + + # Do nothing if "all" is included as a match value. VersionMatcher does + # handle it correctly, but as an optimization we should just skip the + # filtering altogether if we can. + if "all" in version_match_values: + matched_objects = objects + + else: + + # Must coerce datetime strings to objects; also convert to a set, + # which makes subsequent code simpler. + for idx, value in enumerate(version_match_values): + if value not in ("first", "last", "all"): + try: + version_match_values[idx] = timestamp_to_datetime(value) + except ValueError: + raise ProcessingError( + "Invalid query value for match[version]: " + value, + 400 + ) + + version_match_values = set(version_match_values) + + version_matcher = VersionMatcher(objects) + + # We can do some more optimizations: since construction of + # VersionMatcher above sets up a data structure where the earliest and + # latest versions of all objects are readily available, if the + # match values include only "first"/"last", we can use that directly + # and avoid looping through all the objects again. + if version_match_values == {"first"}: + matched_objects = list(version_matcher.earliest_objects()) + + elif version_match_values == {"last"}: + matched_objects = list(version_matcher.latest_objects()) + + elif version_match_values == {"first", "last"}: + matched_objects = list(version_matcher.earliest_latest_objects()) + + else: + matched_objects = [ + obj for obj in objects + if version_matcher.match(obj, version_match_values) + ] + + return matched_objects + + +def _do_spec_version_filter(objects, spec_version_match_values): + """ + Performs match[spec_version] filtering. + + :param objects: The objects to filter + :param spec_version_match_values: The value of the match[spec_version] + query parameter, or None. If None, retain only the latest spec + versions of objects. + :return: A list of matching objects + """ + if spec_version_match_values: + spec_version_match_values = spec_version_match_values.split(",") + + spec_version_matcher = SpecVersionMatcher(objects) + + if spec_version_match_values: + matched_objects = [ + obj for obj in objects + if spec_version_matcher.match(obj, spec_version_match_values) + ] + + else: + # match[spec_version] not given. We must retain the latest spec + # versions of objects. + # + # As an optimization, take advantage of the data structure which + # SpecVersionMatcher has already created, to get the latest versions + # of everything. + matched_objects = list(spec_version_matcher.latest_objects()) + + return matched_objects + + +class MemoryFilter(object): + + def __init__(self, filter_args, interop=False): + self.filter_args = filter_args + self.interop = interop + + # Optimization: order filter application such that faster filters + # run first. + self.filter_order = sorted(self.filter_args, key=_speed_tier) + + def sort_and_paginate(self, data, limit): + data.sort(key=lambda x: x["__meta"].date_added) + + if limit is None: + new = data + next_save = [] + else: + new = data[:limit] + next_save = data[limit:] + + headers = {} + if new: + headers["X-TAXII-Date-Added-First"] = timestamp_to_taxii_json( + new[0]["__meta"].date_added + ) + headers["X-TAXII-Date-Added-Last"] = timestamp_to_taxii_json( + new[-1]["__meta"].date_added + ) + + return new, next_save, headers + + def process_filter(self, data, limit=None): + + # Collect the match objects and relevant information we need to do + # the filtering. This weeds out filter args we don't recognize, and + # which aren't simple filters we can handle in a uniform way. We can + # handle the bulk of them uniformly like this. + prop_matchers = [] + for filter_key in self.filter_order: + matcher = _get_property_matcher(filter_key, self.interop) + + if matcher: + filter_values = set(self.filter_args[filter_key].split(",")) + + if isinstance(matcher, SimplePropertyValueMatcher): + try: + filter_values = matcher.coerce_values(filter_values) + except ValueError: + # Type coercion failure. + raise ProcessingError( + "Invalid query value(s) for " + filter_key, 400 + ) + + prop_matchers.append((matcher, filter_values)) + + matched_objects = [] + for obj in data: + for matcher, match_values in prop_matchers: + if not matcher.match(obj, match_values): + break + else: + matched_objects.append(obj) + + # match[version] and match[spec_version] need more specialized handling + # due to their requirement to handle "first" and "last" type values. + # Those evaluations can't be done solely based on an individual object. + matched_objects = _do_version_filter( + matched_objects, self.filter_args.get("match[version]") + ) + + matched_objects = _do_spec_version_filter( + matched_objects, self.filter_args.get("match[spec_version]") + ) + + # sort objects by date_added and paginate as necessary + final_match, save_next, headers = self.sort_and_paginate( + matched_objects, limit + ) + + return final_match, save_next, headers diff --git a/medallion/filters/mongodb_filter.py b/medallion/filters/mongodb_filter.py index f08d133c..776dd558 100644 --- a/medallion/filters/mongodb_filter.py +++ b/medallion/filters/mongodb_filter.py @@ -1,106 +1,723 @@ from bson.son import SON from pymongo import ASCENDING -from ..common import datetime_to_float, string_to_datetime -from .basic_filter import BasicFilter +import medallion.common +import medallion.filters.common +from ..exceptions import ProcessingError -class MongoDBFilter(BasicFilter): +# These are basically "blueprints" for how to construct mongo queries. They +# are primarily intended for the more complicated interop match filters, e.g. +# tier 3. Tier 3 filters require looking in various places inside nested +# structures within objects. The following describes where to look, within +# which types of STIX objects. +# +# The top level keys are the values in square brackets in "match[...]" style +# TAXII query parameters. The next level keys are STIX types, which determine +# which type of objects will be examined for that query. The STIX type keys +# map to a list of mongo query paths specific to the type. These are used +# directly in mongo queries, so they must follow mongo query syntax rules. +# +# Where None is used for a STIX type (second level key), it means the mapped +# paths should be searched in all object types. This is appropriate for very +# general facilities which exist across all STIX object types. +_MONGO_MATCH_SPECS = { + "address_family": { + "network-traffic": ["extensions.socket-ext.address_family"] + }, + "external_id": { + None: ["external_references.external_id"] + }, + "integrity_level": { + "process": ["extensions.windows-process-ext.integrity_level"] + }, + "pe_type": { + "file": ["extensions.windows-pebinary-ext.pe_type"] + }, + "phase_name": { + "attack-pattern": ["kill_chain_phases.phase_name"], + "indicator": ["kill_chain_phases.phase_name"], + "infrastructure": ["kill_chain_phases.phase_name"], + "malware": ["kill_chain_phases.phase_name"], + "tool": ["kill_chain_phases.phase_name"] + }, + "service_status": { + "process": ["extensions.windows-service-ext.service_status"] + }, + "service_type": { + "process": ["extensions.windows-service-ext.service_type"] + }, + "socket_type": { + "network-traffic": ["extensions.socket-ext.socket_type"] + }, + "source_name": { + None: ["external_references.source_name"] + }, + "start_type": { + "process": ["extensions.windows-service-ext.start_type"] + }, + "tlp": { + None: [ + "object_marking_refs", + "granular_markings.marking_ref" + ] + }, + # Relationships interop filter, for locating embedded relationships + "relationships-all": { + None: [ + "created_by_ref", + "granular_markings.marking_ref", + "object_marking_refs" + ], + "directory": [ + "contains_refs" + ], + "domain-name": [ + "resolves_to_refs" + ], + "email-addr": [ + "belongs_to_ref" + ], + "email-message": [ + "bcc_refs", + "body_multipart.body_raw_ref", + "cc_refs", + "from_ref", + "raw_email_ref", + "sender_ref", + "to_refs" + ], + "file": [ + "contains_refs", + "extensions.archive-ext.contains_refs", + "content_ref", + "parent_directory_ref" + ], + "grouping": [ + "object_refs" + ], + "ipv4-addr": [ + "belongs_to_refs", + "resolves_to_refs" + ], + "ipv6-addr": [ + "belongs_to_refs", + "resolves_to_refs" + ], + "language-content": [ + "object_ref" + ], + "malware": [ + "operating_system_refs", + "sample_refs" + ], + "malware-analysis": [ + "analysis_sco_refs", + "host_vm_ref", + "installed_software_refs", + "operating_system_ref", + "sample_ref" + ], + "network-traffic": [ + "dst_payload_ref", + "dst_ref", + "encapsulated_by_ref", + "encapsulates_refs", + "extensions.http-request-ext.message_body_data_ref", + "src_payload_ref", + "src_ref" + ], + "note": [ + "object_refs" + ], + "observed-data": [ + "object_refs" + ], + "opinion": [ + "object_refs" + ], + "process": [ + "child_refs", + "creator_user_ref", + "image_ref", + "opened_connection_refs", + "parent_ref", + "extensions.windows-service-ext.service_dll_refs" + ], + "relationship": [ + "source_ref", + "target_ref" + ], + "report": [ + "object_refs" + ], + "sighting": [ + "observed_data_refs", + "sighting_of_ref", + "where_sighted_refs" + ], + "windows-registry-key": [ + "creator_user_ref" + ] + } +} - def __init__(self, filter_args, basic_filter, allowed, record=None): - super(MongoDBFilter, self).__init__(filter_args) - self.basic_filter = basic_filter - self.full_query = self._query_parameters(allowed) - self.record = record - def _query_parameters(self, allowed): - parameters = self.basic_filter - if self.filter_args: - match_type = self.filter_args.get("match[type]") - if match_type and "type" in allowed: - types_ = match_type.split(",") - if len(types_) == 1: - parameters["type"] = {"$eq": types_[0]} +# Make the hashes entries to the above mapping programmatically... too verbose +# to write it all out directly! +for hash_type in ( + "MD5", + "SHA-1", + "SHA-256", + "SHA-512", + "SHA3-256", + "SHA3-512", + "SSDEEP", + "TLSH" +): + _MONGO_MATCH_SPECS[hash_type] = { + None: ["external_references.hashes." + hash_type], + "artifact": ["hashes." + hash_type], + "file": [ + "hashes." + hash_type, + "extensions.ntfs-ext.alternate_data_streams.hashes." + hash_type, + "extensions.windows-pebinary-ext.file_header_hashes." + hash_type, + "extensions.windows-pebinary-ext.optional_header.hashes." + hash_type, + "extensions.windows-pebinary-ext.sections.hashes." + hash_type, + ], + "x509-certificate": ["hashes." + hash_type] + } + + +def _coerce_filter_args(filter_args): + """ + Split query parameter values on commas, and coerce them to python types + appropriate to the semantics of the TAXII filter and this mongo backend + implementation. + + Unrecognized parameters are split on commas but their values are otherwise + not changed. + + :param filter_args: TAXII HTTP query parameters, as a mapping from string + to string. + :return: A mapping from string to list of values of other types. + :raises ProcessingError: If coercion of any parameter value fails + """ + + coerced_filter_args = {} + + for arg_name, arg_value in filter_args.items(): + + coerced_values = [] + split_values = arg_value.split(",") + # use this when iterating over split_values; the catch clause uses it + # to reference a particular split value which failed coercion. + split_value = None + + try: + + if arg_name.startswith("match[") and arg_name.endswith("]"): + match_filter_name = arg_name[6:-1] + filter_info = medallion.filters.common.get_filter_info( + match_filter_name + ) + + if filter_info: + for split_value in split_values: + coerced_values.append( + filter_info.type_coercer(split_value) + ) + + elif match_filter_name == "version": + # Special case match[...] filter: version values have a mix + # of formats; can't treat all the same way. + for split_value in split_values: + if split_value in ("first", "last", "all"): + coerced_value = split_value + else: + coerced_value = medallion.common.timestamp_to_epoch_seconds( + split_value + ) + coerced_values.append(coerced_value) + else: - parameters["type"] = {"$in": types_} - match_id = self.filter_args.get("match[id]") - if match_id and "id" in allowed: - ids_ = match_id.split(",") - if len(ids_) == 1: - parameters["id"] = {"$eq": ids_[0]} + # Unrecognized match[...] filter; use values as-is + coerced_values = split_values + + # special non match[...] filter case which still needs coercion + elif arg_name == "added_after": + for split_value in split_values: + coerced_values.append( + medallion.common.timestamp_to_epoch_seconds(split_value) + ) + + else: + # Unrecognized non-match[...] filter; use values as-is + coerced_values = split_values + + except ValueError as e: + # Catch type coercion errors. TypeErrors shouldn't happen here I + # think, since the coercer functions must support conversion from + # strings, and we are converting from strings here (since the + # values are coming from a URL). So if they do, that's a server + # error (500). + raise ProcessingError(( + "Invalid query value for filter '{}': {}" + ).format( + arg_name, split_value + ), 400 + ) from e + + coerced_filter_args[arg_name] = coerced_values + + return coerced_filter_args + + +def _mongo_query_from_match_spec(match_spec, coerced_filter_values): + """ + Given some query parameter values and a spec from the above set of specs, + create an actual mongo query. + + :param match_spec: A match spec from _MONGO_MATCH_SPECS (the value of a + top-level key) + :param coerced_filter_values: List of parameter values, already coerced to + proper types + :return: A mongo query, as a dict + """ + + # Empty match spec is malformed + assert len(match_spec) > 0 + + # Top level is an "or", e.g. over STIX types, or property checks not + # specific to STIX types. + top_or = [] + + for stix_type, query_paths in match_spec.items(): + + # If no paths, this is a malformed spec + assert len(query_paths) > 0 + + # path_or contains tests for all places in an object type where a + # particular property is known to be. + if len(coerced_filter_values) == 1: + path_or = [ + {query_path: coerced_filter_values[0]} + for query_path in query_paths + ] + else: + path_or = [ + {query_path: {"$in": coerced_filter_values}} + for query_path in query_paths + ] + + if stix_type: + # We have a type; combine our above path "or" into an "and" + # with a STIX type check. + type_and = { + "type": stix_type + } + + # optimize away a length-one "or" + if len(path_or) == 1: + type_and.update(path_or[0]) + else: + type_and["$or"] = path_or + + top_or.append(type_and) + + else: + # No type check required, thus no "and" is required. This would + # result in an "or" inside another "or", which can be optimized: + # merge the child into the parent. + top_or.extend(path_or) + + # Another optimization: don't need an "$or" with only one disjunct in it. + if len(top_or) == 1: + query = top_or[0] + else: + query = {"$or": top_or} + + return query + + +def _mongo_query_from_filter(filter_name, coerced_filter_values, interop): + """ + Create a mongo query corresponding to the given TAXII query parameter name + and corresponding coerced values. match[version] and match[spec_version] + are not handled here. They and any unrecognized filters will be ignored. + + :param filter_name: A TAXII query parameter name + :param coerced_filter_values: A list of coerced parameter values + :param interop: Whether to recognize interop filters. If True, many more + types of TAXII filters are recognized. + :return: A mongo query as a dict, or None if the function doesn't recognize + or handle the given query + """ + + query = None + + if filter_name.startswith("match[") and filter_name.endswith("]"): + + filter_name = filter_name[6:-1] + match_spec = _MONGO_MATCH_SPECS.get(filter_name) + + if interop: + + if match_spec: + # Complex case: construct a query from the spec. This should + # cover all tier 3 filters at least. + query = _mongo_query_from_match_spec( + match_spec, coerced_filter_values + ) + + elif filter_name in medallion.filters.common.BUILTIN_PROPERTIES \ + or filter_name in medallion.filters.common.TIER_1_PROPERTIES \ + or filter_name in medallion.filters.common.TIER_2_PROPERTIES: + + # Can treat tier 1 and 2 filters and some standard filters all + # the same way + if len(coerced_filter_values) == 1: + query = { + filter_name: coerced_filter_values[0] + } else: - parameters["id"] = {"$in": ids_} - match_spec_version = self.filter_args.get("match[spec_version]") - if match_spec_version and "spec_version" in allowed: - spec_versions = match_spec_version.split(",") - media_fmt = "application/stix+json;version={}" - if len(spec_versions) == 1: - parameters["_manifest.media_type"] = { - "$eq": media_fmt.format(spec_versions[0]) + query = { + filter_name: {"$in": coerced_filter_values} } + + elif filter_name in medallion.filters.common.CALCULATION_PROPERTIES: + + filter_name, op = filter_name.split("-") + + # $gte and $lte are supported mongo operators! + op = "$" + op + + # Weird, but in case there was more than one value. + if op == "$gte": + value = min(coerced_filter_values) else: - parameters["_manifest.media_type"] = { - "$in": [media_fmt.format(x) for x in spec_versions] + # op == "$lte", the only other thing it could be, + # as of this writing. + value = max(coerced_filter_values) + + query = { + filter_name: { + op: value } - added_after_date = self.filter_args.get("added_after") - if added_after_date: - added_after_timestamp = datetime_to_float(string_to_datetime(added_after_date)) - parameters["_manifest.date_added"] = { - "$gt": added_after_timestamp, } - return parameters - def process_filter(self, data, allowed, manifest_info): - pipeline = [ - {"$match": {"$and": [self.full_query]}}, + elif filter_name in medallion.filters.common.BUILTIN_PROPERTIES: + # interop disabled; consider spec builtin filters only. + # This is a copy-paste of the builtin/interop tier 1/2 filter code + # above. Redundant, but maybe the overall if/then/else logic is + # simpler this way? + if len(coerced_filter_values) == 1: + query = { + filter_name: coerced_filter_values[0] + } + else: + query = { + filter_name: {"$in": coerced_filter_values} + } + + # else: a match[...] filter we don't recognize. Ignore it. + + elif filter_name == "added_after": + + # Just in case there are multiple added_after values... but there + # shouldn't be. + min_added_after = min(coerced_filter_values) + + query = { + "_manifest.date_added": { + "$gt": min_added_after + } + } + + # else: a non match[...] filter we don't recognize. Ignore it. + + return query + + +def _make_mongo_query(coerced_filter_args, interop): + """ + Make a mongo query for TAXII query parameters which can be handled with one + single query (no additional pipeline stages required). The version query + is not handled here since its "first"/"last" query values require a + different kind of treatment. spec_version with specific version values + could be handled, but is not, to keep mongo backend behavior the same as + the memory backend (it is handled in a later stage). Latest spec_version + (i.e. the implicit behavior when no spec_version filter is given) can't be + handled here. + + :param coerced_filter_args: A mapping from query parameter names to + coerced values. + :param interop: Whether to recognize TAXII interop filters when + constructing the query. + :return: A mongo query, as a dict + """ + + sub_queries = [] + for arg_name, arg_values in coerced_filter_args.items(): + sub_query = _mongo_query_from_filter(arg_name, arg_values, interop) + + if sub_query: + sub_queries.append(sub_query) + + if sub_queries: + if len(sub_queries) == 1: + query = sub_queries[0] + else: + query = { + "$and": sub_queries + } + else: + # We recognized... nothing! + query = {} + + return query + + +def _make_version_pipeline_stages(versions): + """ + Create a list of pipeline stages which performs the requested version + filtering. + + :param versions: Iterable of coerced version values. These can include + floats and the strings "first", "last", "all". + :return: A mongo pipeline as a list; will be an empty list if "all" is + a query value + """ + + pipeline = [] + + # If "all" is included, no filtering is necessary at all. + if "all" not in versions: + + need_first = "first" in versions + need_last = "last" in versions + + # If "first" or "last" is included, we need to add temp window fields. + # Track what fields we add, so we can remove them again. + fields_to_remove = [] + if need_first or need_last: + + window_output = {} + + if need_first: + window_output["_min_version"] = { + "$min": "$_manifest.version" + } + fields_to_remove.append("_min_version") + + if need_last: + window_output["_max_version"] = { + "$max": "$_manifest.version" + } + fields_to_remove.append("_max_version") + + window_stage = { + "$setWindowFields": { + "partitionBy": "$id", + "output": window_output + } + } + + pipeline.append(window_stage) + + # Build a "$match" stage which filters documents based on the query + # parameters and fields present. + version_checks = [] + + explicit_versions = [ + ver for ver in versions if not isinstance(ver, str) ] - # when no filter is provided only latest is considered. - match_spec_version = self.filter_args.get("match[spec_version]") - if not match_spec_version and "spec_version" in allowed: - latest_pipeline = list(pipeline) - latest_pipeline.append({"$sort": {"_manifest.media_type": ASCENDING}}) - latest_pipeline.append({"$group": SON([("_id", "$id"), ("media_type", SON([("$last", "$_manifest.media_type")]))])}) + if explicit_versions: + if len(explicit_versions) == 1: + version_checks.append({ + "_manifest.version": explicit_versions[0] + }) + else: + version_checks.append({ + "_manifest.version": { + "$in": explicit_versions + } + }) + + if need_first: + version_checks.append({ + "$expr": { + "$eq": [ + "$_manifest.version", + "$_min_version" + ] + } + }) + + if need_last: + version_checks.append({ + "$expr": { + "$eq": [ + "$_manifest.version", + "$_max_version" + ] + } + }) + + if len(version_checks) == 1: + version_match = { + "$match": version_checks[0] + } + else: + version_match = { + "$match": { + "$or": version_checks + } + } + + pipeline.append(version_match) + + # remove the extra window fields + if fields_to_remove: + pipeline.append({ + "$unset": fields_to_remove + }) + + return pipeline + - query = [ - {"id": x["_id"], "_manifest.media_type": x["media_type"]} - for x in list(data.aggregate(latest_pipeline)) +def _make_spec_version_pipeline_stages(spec_versions=None): + """ + Create a list of pipeline stages which performs the requested spec_version + filtering. + + :param spec_versions: Sequence of spec version values, or None. Sequences + must be of spec versions as strings, e.g. "2.0", "2.1", etc. If None, + construct the pipeline to include only the latest spec versions. + :return: A mongo pipeline as a list + """ + + if spec_versions: + # Match specific spec version(s) + if len(spec_versions) == 1: + match = { + "_manifest.media_type": "application/stix+json;version=" + + spec_versions[0] + } + + else: + media_types = [ + "application/stix+json;version=" + spec_version + for spec_version in spec_versions ] - if query: - pipeline.append({"$match": {"$or": query}}) - - # create version filter - if "version" in allowed: - match_version = self.filter_args.get("match[version]") - if not match_version: - match_version = "last" - if "all" not in match_version: - actual_dates = [datetime_to_float(string_to_datetime(x)) for x in match_version.split(",") if (x != "first" and x != "last")] - - latest_pipeline = list(pipeline) - latest_pipeline.append({"$sort": {"_manifest.version": ASCENDING}}) - latest_pipeline.append({"$group": SON([("_id", "$id"), ("versions", SON([("$push", "$_manifest.version")]))])}) - - # The documents are sorted in ASCENDING order. - version_selector = [] - if "last" in match_version: - version_selector.append({"$arrayElemAt": ["$versions", -1]}) - if "first" in match_version: - version_selector.append({"$arrayElemAt": ["$versions", 0]}) - for d in actual_dates: - version_selector.append({"$arrayElemAt": ["$versions", {"$indexOfArray": ["$versions", d]}]}) - latest_pipeline.append({"$addFields": {"versions": version_selector}}) - if actual_dates: - latest_pipeline.append({"$match": {"versions": {"$in": actual_dates}}}) - - query = [ - {"id": x["_id"], "_manifest.version": {"$in": x["versions"]}} - for x in list(data.aggregate(latest_pipeline)) - ] - if query: - pipeline.append({"$match": {"$or": query}}) - - pipeline.append({"$sort": SON([("_manifest.date_added", ASCENDING), ("created", ASCENDING), ("modified", ASCENDING)])}) + match = { + "_manifest.media_type": { + "$in": media_types + } + } + + pipeline = [ + { + "$match": match + } + ] + + else: + # Match latest spec version + pipeline = [ + { + "$setWindowFields": { + "partitionBy": "$id", + "output": { + "_max_spec_version": { + # This is a string comparison-based maximum. It + # will fail if/when the STIX version reaches 2.10 + # (since as strings, "2.10" < "2.2"). We will need + # to do some redesigning at that point... + "$max": "$_manifest.media_type" + } + } + } + }, + { + "$match": { + "$expr": { + "$eq": [ + "$_manifest.media_type", + "$_max_spec_version" + ] + } + } + }, + { + "$unset": "_max_spec_version" + } + ] + + return pipeline + + +def _make_base_mongo_pipeline(basic_filter, filter_args, interop): + """ + Construct the base Mongo aggregation pipeline, which performs the given + filtering. + + :param basic_filter: Extra filters to be merged into the initial $match + stage of the pipeline. + :param filter_args: TAXII filters, as a mapping taken from the query + parameters of an HTTP request + :param interop: Whether to recognize and apply TAXII interop filters + :return: A Mongo aggregation pipeline, as a list of stages + """ + + coerced_filter_args = _coerce_filter_args(filter_args) + + base_match = _make_mongo_query(coerced_filter_args, interop) + # this merger results in an implicit "and" between basic_filter and + # base_match. + base_match.update(basic_filter) + + pipeline = [ + {"$match": base_match} + ] + + version_filters = coerced_filter_args.get("match[version]") + if not version_filters: + version_filters = ["last"] + + pipeline.extend( + _make_version_pipeline_stages(version_filters) + ) + + pipeline.extend( + _make_spec_version_pipeline_stages( + coerced_filter_args.get("match[spec_version]") + ) + ) + + pipeline.append({ + "$sort": SON([ + ("_manifest.date_added", ASCENDING), + ("created", ASCENDING), + ("modified", ASCENDING) + ]) + }) + + return pipeline + + +class MongoDBFilter: + + def __init__(self, filter_args, basic_filter, record=None, interop=False): + self.record = record + self.base_pipeline = _make_base_mongo_pipeline( + basic_filter, filter_args, interop + ) + + def process_filter(self, data, manifest_info): + + pipeline = self.base_pipeline.copy() if manifest_info == "manifests": # Project the final results diff --git a/medallion/scripts/run.py b/medallion/scripts/run.py index 4fd493b7..707afce2 100644 --- a/medallion/scripts/run.py +++ b/medallion/scripts/run.py @@ -4,12 +4,7 @@ import os import textwrap -from medallion import ( - __version__, connect_to_backend, register_blueprints, set_config -) -from medallion.common import ( - APPLICATION_INSTANCE, get_application_instance_config_values -) +from medallion import __version__, create_app import medallion.config log = logging.getLogger("medallion") @@ -127,16 +122,10 @@ def main(): medallion_args.conf_dir if not medallion_args.no_conf_dir else None, ) - set_config(APPLICATION_INSTANCE, "users", configuration) - set_config(APPLICATION_INSTANCE, "taxii", configuration) - set_config(APPLICATION_INSTANCE, "backend", configuration) - - APPLICATION_INSTANCE.medallion_backend = connect_to_backend(get_application_instance_config_values(APPLICATION_INSTANCE, "backend")) - if (not APPLICATION_INSTANCE.blueprints): - register_blueprints(APPLICATION_INSTANCE) + app = create_app(configuration) if not medallion_args.conf_check: - APPLICATION_INSTANCE.run( + app.run( host=medallion_args.host, port=medallion_args.port, debug=medallion_args.debug_mode, diff --git a/medallion/test/base_test.py b/medallion/test/base_test.py deleted file mode 100644 index c9aaf486..00000000 --- a/medallion/test/base_test.py +++ /dev/null @@ -1,135 +0,0 @@ -import base64 -import os - -from medallion import connect_to_backend, register_blueprints, set_config -from medallion.common import ( - APPLICATION_INSTANCE, get_application_instance_config_values -) - - -class TaxiiTest(): - type = None - DATA_FILE = os.path.join( - os.path.dirname(__file__), "data", "default_data.json", - ) - TEST_OBJECT = { - "objects": [ - { - "type": "course-of-action", - "spec_version": "2.1", - "id": "course-of-action--68794cd5-28db-429d-ab1e-1256704ef906", - "created": "2017-01-27T13:49:53.935Z", - "modified": "2017-01-27T13:49:53.935Z", - "name": "Test object" - } - ] - } - - no_config = {} - - config_no_taxii = { - "backend": { - "module_class": "MemoryBackend", - "filename": DATA_FILE, - }, - "users": { - "admin": "Password0", - }, - } - - config_no_auth = { - "backend": { - "module_class": "MemoryBackend", - "filename": DATA_FILE, - }, - "taxii": { - "max_page_size": 20, - }, - } - - config_no_backend = { - "users": { - "admin": "Password0", - }, - "taxii": { - "max_page_size": 20, - }, - } - - memory_config = { - "backend": { - "module_class": "MemoryBackend", - "filename": DATA_FILE, - }, - "users": { - "admin": "Password0", - }, - "taxii": { - "max_page_size": 20, - }, - } - - mongodb_config = { - "backend": { - "module_class": "MongoBackend", - "uri": "mongodb://127.0.0.1:27017/", - "filename": DATA_FILE, - "clear_db": True - }, - "users": { - "root": "example", - }, - "taxii": { - "max_page_size": 20, - }, - } - - def setUp(self, start_threads=True): - self.__name__ = self.type - self.app = APPLICATION_INSTANCE - self.app_context = APPLICATION_INSTANCE.app_context() - self.app_context.push() - self.app.testing = True - if not self.app.blueprints: - register_blueprints(self.app) - if self.type == "mongo": - self.configuration = self.mongodb_config - elif self.type == "memory": - self.configuration = self.memory_config - elif self.type == "memory_no_config": - self.configuration = self.no_config - elif self.type == "no_taxii": - self.configuration = self.config_no_taxii - elif self.type == "no_auth": - self.configuration = self.config_no_auth - elif self.type == "no_backend": - self.configuration = self.config_no_backend - else: - raise RuntimeError("Unknown backend!") - set_config(self.app, "backend", self.configuration) - set_config(self.app, "users", self.configuration) - set_config(self.app, "taxii", self.configuration) - if not start_threads: - self.app.backend_config["run_cleanup_threads"] = False - APPLICATION_INSTANCE.medallion_backend = connect_to_backend(get_application_instance_config_values(APPLICATION_INSTANCE, - "backend"), - clear_db=True) - self.client = APPLICATION_INSTANCE.test_client() - if self.type == "memory_no_config" or self.type == "no_auth": - encoded_auth = "Basic " + \ - base64.b64encode(b"user:pass").decode("ascii") - elif self.type == "mongo": - encoded_auth = "Basic " + \ - base64.b64encode(b"root:example").decode("ascii") - else: - encoded_auth = "Basic " + \ - base64.b64encode(b"admin:Password0").decode("ascii") - self.headers = {"Accept": "application/taxii+json;version=2.1", "Authorization": encoded_auth} - self.post_headers = { - "Content-Type": "application/taxii+json;version=2.1", - "Accept": "application/taxii+json;version=2.1", - "Authorization": encoded_auth - } - - def tearDown(self): - self.app_context.pop() diff --git a/medallion/test/conftest.py b/medallion/test/conftest.py index 33f2a228..69d7bfcb 100644 --- a/medallion/test/conftest.py +++ b/medallion/test/conftest.py @@ -3,10 +3,6 @@ import pytest -def pytest_addoption(parser): - parser.addoption("--backends", action="store", default="memory,mongo") - - # This fixture is cheap so we just do it for every function it's requested by # to ensure that functions which aren't opted-in themselves or by their module # remain unaffected diff --git a/medallion/test/data/default_data.json b/medallion/test/data/default_data.json index 926c044c..3a2449fd 100644 --- a/medallion/test/data/default_data.json +++ b/medallion/test/data/default_data.json @@ -19,8 +19,8 @@ ], "max_content_length": 9765625 }, - "status": [], - "collections": [] + "status": {}, + "collections": {} }, "api2": { "information": { @@ -31,8 +31,8 @@ ], "max_content_length": 9765625 }, - "status": [], - "collections": [] + "status": {}, + "collections": {} }, "trustgroup1": { "information": { @@ -43,8 +43,8 @@ ], "max_content_length": 9765625 }, - "status": [ - { + "status": { + "2d086da7-4bdc-4f91-900e-d77486753710": { "id": "2d086da7-4bdc-4f91-900e-d77486753710", "status": "pending", "request_timestamp": "2016-11-02T12:34:34.123456Z", @@ -77,21 +77,21 @@ } ] }, - { + "2d086da7-4bdc-4f91-900e-f4566be4b780": { "id": "2d086da7-4bdc-4f91-900e-f4566be4b780", - "status": "pending", - "request_timestamp": "2016-11-02T12:34:34.123456Z", + "status": "pending", + "request_timestamp": "2016-11-02T12:34:34.123456Z", "total_objects": 0, - "success_count": 0, - "successes": [], - "failure_count": 0, - "failures": [], - "pending_count": 0, - "pendings": [] + "success_count": 0, + "successes": [], + "failure_count": 0, + "failures": [], + "pending_count": 0, + "pendings": [] } - ], - "collections": [ - { + }, + "collections": { + "472c94ae-3113-4e3e-a4dd-a9f4ac7471d4": { "id": "472c94ae-3113-4e3e-a4dd-a9f4ac7471d4", "title": "This data collection is for testing querying across collections", "can_read": false, @@ -99,10 +99,9 @@ "media_types": [ "application/stix+json;version=2.1" ], - "objects": [], - "manifest": [] + "objects": [] }, - { + "365fed99-08fa-fdcd-a1b3-fb247eb41d01": { "id": "365fed99-08fa-fdcd-a1b3-fb247eb41d01", "title": "This data collection is for testing adding objects", "can_read": true, @@ -110,10 +109,9 @@ "media_types": [ "application/stix+json;version=2.1" ], - "objects": [], - "manifest": [] + "objects": [] }, - { + "91a7b528-80eb-42ed-a74d-c6fbd5a26116": { "id": "91a7b528-80eb-42ed-a74d-c6fbd5a26116", "title": "High Value Indicator Collection", "description": "This data collection is for collecting high value IOCs", @@ -124,6 +122,29 @@ "application/stix+json;version=2.1" ], "objects": [ + { + "type": "malware-analysis", + "spec_version": "2.1", + "id": "malware-analysis--084a658c-a7ef-4581-a21d-1f600908741b", + "created_by_ref": "identity--eae683c1-d472-4708-bd63-f9b1a1f016b1", + "created": "2021-04-16T09:49:24.378932Z", + "modified": "2021-12-11T07:17:44.542582Z", + "product": "option", + "version": "moment", + "submitted": "2022-03-26T15:06:01.434493Z", + "analysis_started": "2022-10-04T07:07:55.365672Z", + "analysis_ended": "2023-06-14T07:12:00.962419Z", + "result": "unknown", + "lang": "en", + "confidence": 16, + "object_marking_refs": [ + "marking-definition--3e914a0d-957f-40b2-8c35-b119040574fe" + ], + "__meta": { + "date_added": "2022-06-16T13:49:53.935000Z", + "media_type": "application/stix+json;version=2.1" + } + }, { "created": "2014-05-08T09:00:00.000Z", "modified": "2014-05-08T09:00:00.000Z", @@ -132,7 +153,11 @@ "source_ref": "indicator--cd981c25-8042-4166-8945-51178443bdac", "spec_version": "2.1", "target_ref": "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", - "type": "relationship" + "type": "relationship", + "__meta": { + "date_added": "2014-05-08T09:00:00.000000Z", + "media_type": "application/stix+json;version=2.1" + } }, { "created": "2014-05-08T09:00:00.000Z", @@ -146,7 +171,14 @@ "pattern_type": "stix", "spec_version": "2.1", "type": "indicator", - "valid_from": "2014-05-08T09:00:00.000000Z" + "valid_from": "2014-05-08T09:00:00.000000Z", + "object_marking_refs": [ + "marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da" + ], + "__meta": { + "date_added": "2016-11-01T03:04:05.000000Z", + "media_type": "application/stix+json;version=2.1" + } }, { "created": "2016-11-03T12:30:59.000Z", @@ -161,7 +193,11 @@ "pattern_type": "stix", "spec_version": "2.1", "type": "indicator", - "valid_from": "2017-01-27T13:49:53.935382Z" + "valid_from": "2017-01-27T13:49:53.935382Z", + "__meta": { + "date_added": "2016-11-03T12:30:59.001000Z", + "media_type": "application/stix+json;version=2.1" + } }, { "created": "2016-11-03T12:30:59.000Z", @@ -176,7 +212,11 @@ "pattern_type": "stix", "spec_version": "2.1", "type": "indicator", - "valid_from": "2017-01-27T13:49:53.935382Z" + "valid_from": "2017-01-27T13:49:53.935382Z", + "__meta": { + "date_added": "2016-12-27T13:49:59.000000Z", + "media_type": "application/stix+json;version=2.1" + } }, { "created": "2016-11-03T12:30:59.000Z", @@ -191,7 +231,11 @@ "pattern_type": "stix", "spec_version": "2.1", "type": "indicator", - "valid_from": "2016-11-03T12:30:59.000Z" + "valid_from": "2016-11-03T12:30:59.000Z", + "__meta": { + "date_added": "2017-12-31T13:49:53.935000Z", + "media_type": "application/stix+json;version=2.1" + } }, { "created": "2017-01-20T00:00:00.000Z", @@ -202,7 +246,11 @@ "id": "marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da", "name": "TLP:GREEN", "spec_version": "2.1", - "type": "marking-definition" + "type": "marking-definition", + "__meta": { + "date_added": "2017-01-20T00:00:00.000000Z", + "media_type": "application/stix+json;version=2.1" + } }, { "created": "2017-01-27T13:49:53.997Z", @@ -214,7 +262,11 @@ ], "modified": "2018-02-23T18:30:00.000Z", "name": "Poison Ivy", - "type": "malware" + "type": "malware", + "__meta": { + "date_added": "2017-01-27T13:49:59.997000Z", + "media_type": "application/stix+json;version=2.0" + } }, { "created": "2017-01-27T13:49:53.997Z", @@ -227,61 +279,15 @@ "modified": "2017-01-27T13:49:53.997Z", "name": "Poison Ivy", "spec_version": "2.1", - "type": "malware" - } - ], - "manifest": [ - { - "date_added": "2014-05-08T09:00:00.000000Z", - "id": "relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463", - "media_type": "application/stix+json;version=2.1", - "version": "2014-05-08T09:00:00.000Z" - }, - { - "date_added": "2016-11-01T03:04:05.000000Z", - "id": "indicator--cd981c25-8042-4166-8945-51178443bdac", - "media_type": "application/stix+json;version=2.1", - "version": "2014-05-08T09:00:00.000Z" - }, - { - "date_added": "2016-11-03T12:30:59.001000Z", - "id": "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e", - "media_type": "application/stix+json;version=2.1", - "version": "2016-11-03T12:30:59.000Z" - }, - { - "date_added": "2016-12-27T13:49:59.000000Z", - "id": "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e", - "media_type": "application/stix+json;version=2.1", - "version": "2016-12-25T12:30:59.444Z" - }, - { - "date_added": "2017-01-20T00:00:00.000000Z", - "id": "marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da", - "media_type": "application/stix+json;version=2.1", - "version": "2017-01-20T00:00:00.000Z" - }, - { - "date_added": "2017-01-27T13:49:59.997000Z", - "id": "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", - "media_type": "application/stix+json;version=2.1", - "version": "2017-01-27T13:49:53.997Z" - }, - { - "date_added": "2017-01-27T13:49:59.997000Z", - "id": "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", - "media_type": "application/stix+json;version=2.0", - "version": "2018-02-23T18:30:00.000Z" - }, - { - "date_added": "2017-12-31T13:49:53.935000Z", - "id": "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e", - "media_type": "application/stix+json;version=2.1", - "version": "2017-01-27T13:49:53.935Z" + "type": "malware", + "__meta": { + "date_added": "2017-01-27T13:49:59.997000Z", + "media_type": "application/stix+json;version=2.1" + } } ] }, - { + "52892447-4d7e-4f70-b94d-d7f22742ff63": { "id": "52892447-4d7e-4f70-b94d-d7f22742ff63", "title": "Indicators from the past 24-hours", "description": "This data collection is for collecting current IOCs", @@ -290,10 +296,9 @@ "media_types": [ "application/stix+json;version=2.1" ], - "objects": [], - "manifest": [] + "objects": [] }, - { + "64993447-4d7e-4f70-b94d-d7f33742ee63": { "id": "64993447-4d7e-4f70-b94d-d7f33742ee63", "title": "Secret Indicators", "description": "Non accessible", @@ -302,9 +307,8 @@ "media_types": [ "application/stix+json;version=2.1" ], - "objects": [], - "manifest": [] + "objects": [] } - ] + } } } diff --git a/medallion/test/test_backends.py b/medallion/test/test_backends.py index 53535472..5a12e864 100644 --- a/medallion/test/test_backends.py +++ b/medallion/test/test_backends.py @@ -1,65 +1,139 @@ import copy -import datetime import json +import os import tempfile +import pymongo import pytest -from medallion import common, exceptions, test +from medallion import common, create_app, exceptions, test from medallion.backends.base import SECONDS_IN_24_HOURS +import medallion.filters.common from medallion.views import MEDIA_TYPE_TAXII_V21 -from .base_test import TaxiiTest +DATA_FILE = os.path.join( + os.path.dirname(__file__), "data", "default_data.json", +) -class MemoryTestServer(TaxiiTest): - type = "memory" +GET_HEADERS = { + "Accept": "application/taxii+json;version=2.1" +} - def count(self, documents): - return len(documents) +POST_HEADERS = { + "Content-Type": "application/taxii+json;version=2.1", + "Accept": "application/taxii+json;version=2.1" +} -class MongoTestServer(TaxiiTest): - type = "mongo" - def count(self, documents): - return documents.count_documents({}) +TEST_OBJECT = { + "objects": [ + { + "type": "course-of-action", + "spec_version": "2.1", + "id": "course-of-action--68794cd5-28db-429d-ab1e-1256704ef906", + "created": "2017-01-27T13:49:53.935Z", + "modified": "2017-01-27T13:49:53.935Z", + "name": "Test object" + } + ] +} TestServers = ["memory", "mongo"] -@pytest.fixture(scope="module", params=TestServers) -def backend(request): - if request.param in request.config.getoption("backends"): - if request.param == "memory": - test_server = MemoryTestServer() - if request.param == "mongo": - test_server = MongoTestServer() - test_server.setUp() - yield test_server - test_server.tearDown() +@pytest.fixture(scope="module") +def mongo_client(): + # lazy-connect, in case we're only running memory backend tests anyways + mongo_client = pymongo.MongoClient(connect=False) + yield mongo_client + mongo_client.close() + + +@pytest.fixture() +def backup_filter_settings(): + # Back up filter settings. A given backend can override the global default + # settings, which is okay since in normal operation only one backend is + # active. It's kinda problematic in unit tests though, where we create + # lots of backends, and we don't want the overrides applied in one to + # affect any others. + # + # So far, we only wholesale override a filter info object, so shallow + # copies here are sufficient. + filters_common = medallion.filters.common + + backup_builtin_filters = filters_common.BUILTIN_PROPERTIES.copy() + backup_tier1_filters = filters_common.TIER_1_PROPERTIES.copy() + backup_tier2_filters = filters_common.TIER_2_PROPERTIES.copy() + backup_tier3_filters = filters_common.TIER_3_PROPERTIES.copy() + backup_relationsip_filters = filters_common.RELATIONSHIP_PROPERTIES.copy() + backup_calculation_filters = filters_common.CALCULATION_PROPERTIES.copy() + + yield + + filters_common.BUILTIN_PROPERTIES = backup_builtin_filters + filters_common.TIER_1_PROPERTIES = backup_tier1_filters + filters_common.TIER_2_PROPERTIES = backup_tier2_filters + filters_common.TIER_3_PROPERTIES = backup_tier3_filters + filters_common.RELATIONSHIP_PROPERTIES = backup_relationsip_filters + filters_common.CALCULATION_PROPERTIES = backup_calculation_filters + + +def _set_backend(configuration, mongo_client, request): + + if request.param == "memory": + configuration["backend"]["module_class"] = "MemoryBackend" else: - yield pytest.skip("skipped") + configuration["backend"].update( + module_class="MongoBackend", + clear_db=True, + mongo_client=mongo_client + ) + return configuration + + +@pytest.fixture(params=TestServers) +def flask_app(mongo_client, backup_filter_settings, request): + configuration = { + "backend": { + "filename": DATA_FILE, + "interop_requirements": True, + }, + "users": { + "admin": "Password0" + }, + "taxii": { + "max_page_size": 20 + } + } -@pytest.fixture(scope="module", params=TestServers) -def backend_without_threads(request): if request.param in request.config.getoption("backends"): - if request.param == "memory": - test_server = MemoryTestServer() - if request.param == "mongo": - test_server = MongoTestServer() - test_server.setUp(False) - yield test_server - test_server.tearDown() + _set_backend(configuration, mongo_client, request) + + app = create_app(configuration) + + yield app + + # Important for releasing backend resources + app.medallion_backend.close() + else: - yield pytest.skip("skipped") + pytest.skip() + + +@pytest.fixture +def test_client(flask_app): + return flask_app.test_client() # start with basic get requests for each endpoint -def test_server_discovery(backend): - r = backend.client.get(test.DISCOVERY_EP, headers=backend.headers) +def test_server_discovery(test_client): + r = test_client.get( + test.DISCOVERY_EP, headers=GET_HEADERS, auth=("admin", "Password0") + ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 @@ -67,8 +141,10 @@ def test_server_discovery(backend): assert server_info["api_roots"][0] == "http://localhost:5000/api1/" -def test_get_api_root_information(backend): - r = backend.client.get(test.API_ROOT_EP, headers=backend.headers) +def test_get_api_root_information(test_client): + r = test_client.get( + test.API_ROOT_EP, headers=GET_HEADERS, auth=("admin", "Password0") + ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 @@ -76,11 +152,12 @@ def test_get_api_root_information(backend): assert api_root_metadata["title"] == "Malware Research Group" -def test_get_status(backend): - r = backend.client.get( +def test_get_status(test_client): + r = test_client.get( test.API_ROOT_EP + "status/2d086da7-4bdc-4f91-900e-d77486753710", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -91,8 +168,10 @@ def test_get_status(backend): assert "pendings" in status_data -def test_get_collections(backend): - r = backend.client.get(test.COLLECTIONS_EP, headers=backend.headers) +def test_get_collections(test_client): + r = test_client.get( + test.COLLECTIONS_EP, headers=GET_HEADERS, auth=("admin", "Password0") + ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 @@ -108,61 +187,71 @@ def test_get_collections(backend): assert "365fed99-08fa-fdcd-a1b3-fb247eb41d01" in collection_ids -def test_get_objects(backend): +def test_get_objects(test_client): - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP, - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 objs = r.json assert objs['more'] is False - assert len(objs['objects']) == 5 + assert len(objs['objects']) == 6 # testing date-added headers - assert r.headers['X-TAXII-Date-Added-First'] == "2014-05-08T09:00:00.000000Z" - assert r.headers['X-TAXII-Date-Added-Last'] == "2017-12-31T13:49:53.935000Z" + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime("2014-05-08T09:00:00.000000Z") + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime("2022-06-16T13:49:53.935000Z") # testing ordering of returned objects by date_added correct_order = ['relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463', 'indicator--cd981c25-8042-4166-8945-51178443bdac', 'marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da', 'malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec', - 'indicator--6770298f-0fd8-471a-ab8c-1c658a46574e'] + 'indicator--6770298f-0fd8-471a-ab8c-1c658a46574e', + "malware-analysis--084a658c-a7ef-4581-a21d-1f600908741b"] + + returned_order = [ + obj["id"] for obj in objs["objects"] + ] - for x in range(0, len(correct_order)): - assert objs['objects'][x]['id'] == correct_order[x] + assert returned_order == correct_order -def test_get_object(backend): +def test_get_object(test_client): - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec/", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 objs = r.json assert len(objs["objects"]) == 1 - assert objs["objects"][0]["id"] == "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec" + assert all( + obj["id"] == "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec" + for obj in objs["objects"] + ) # testing date-added headers - assert r.headers['X-TAXII-Date-Added-First'] == "2017-01-27T13:49:59.997000Z" - assert r.headers['X-TAXII-Date-Added-Last'] == "2017-01-27T13:49:59.997000Z" + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime("2017-01-27T13:49:59.997000Z") + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime("2017-01-27T13:49:59.997000Z") -def test_add_and_delete_object(backend): +def test_add_and_delete_object(test_client): # ------------- BEGIN: add object section ------------- # - object_id = backend.TEST_OBJECT["objects"][0]["id"] + object_id = TEST_OBJECT["objects"][0]["id"] - r_post = backend.client.post( + r_post = test_client.post( test.ADD_OBJECTS_EP, - data=json.dumps(copy.deepcopy(backend.TEST_OBJECT)), - headers=backend.post_headers, + data=json.dumps(copy.deepcopy(TEST_OBJECT)), + headers=POST_HEADERS, + auth=("admin", "Password0") ) status_response = r_post.json assert r_post.status_code == 202 @@ -171,9 +260,10 @@ def test_add_and_delete_object(backend): # ------------- END: add object section ------------- # # ------------- BEGIN: get object section ------------- # - r_get = backend.client.get( + r_get = test_client.get( test.ADD_OBJECTS_EP, - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r_get.status_code == 200 assert r_get.content_type == MEDIA_TYPE_TAXII_V21 @@ -183,9 +273,10 @@ def test_add_and_delete_object(backend): # ------------- END: get object section ------------- # # ------------- BEGIN: get object w/ filter section --- # - r_get = backend.client.get( + r_get = test_client.get( test.ADD_OBJECTS_EP + "?match[id]=" + object_id, - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r_get.status_code == 200 assert r_get.content_type == MEDIA_TYPE_TAXII_V21 @@ -195,9 +286,10 @@ def test_add_and_delete_object(backend): # ------------- END: get object w/ filter section --- # # ------------- BEGIN: get status section ------------- # - r_get = backend.client.get( + r_get = test_client.get( test.API_ROOT_EP + "status/%s/" % status_response["id"], - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r_get.status_code == 200 assert r_get.content_type == MEDIA_TYPE_TAXII_V21 @@ -207,9 +299,10 @@ def test_add_and_delete_object(backend): # ------------- END: get status section ------------- # # ------------- BEGIN: get manifest section ------------- # - r_get = backend.client.get( + r_get = test_client.get( test.ADD_MANIFESTS_EP + "?match[id]=" + object_id, - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r_get.status_code == 200 assert r_get.content_type == MEDIA_TYPE_TAXII_V21 @@ -219,61 +312,66 @@ def test_add_and_delete_object(backend): # ------------- END: get manifest section ----------- # - r = backend.client.delete( + r = test_client.delete( test.ADD_OBJECTS_EP + object_id, - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id, - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 404 assert r.content_type == MEDIA_TYPE_TAXII_V21 # test getting the deleted object's manifest - r = backend.client.get( + r = test_client.get( test.ADD_MANIFESTS_EP + object_id, - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 404 # for whatever reason, content_type is not normal? doesn't really matter # assert r.content_type == MEDIA_TYPE_TAXII_V21 -def test_get_object_manifests(backend): +def test_get_object_manifests(test_client): - r = backend.client.get( + r = test_client.get( test.GET_MANIFESTS_EP, - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 manifests = r.json - assert len(manifests["objects"]) == 5 + assert len(manifests["objects"]) == 6 # testing the date-added headers - assert r.headers['X-TAXII-Date-Added-First'] == "2014-05-08T09:00:00.000000Z" - assert r.headers['X-TAXII-Date-Added-Last'] == "2017-12-31T13:49:53.935000Z" + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime("2014-05-08T09:00:00.000000Z") + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime("2022-06-16T13:49:53.935000Z") # checking ordered by date_added for x in range(1, len(manifests["objects"])): - assert manifests["objects"][x - 1]["date_added"] < manifests["objects"][x]["date_added"] + assert common.timestamp_to_datetime(manifests["objects"][x - 1]["date_added"]) <= common.timestamp_to_datetime(manifests["objects"][x]["date_added"]) -def test_get_version(backend): - r = backend.client.get( +def test_get_version(test_client): + r = test_client.get( test.GET_OBJECTS_EP + "relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463/versions", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 @@ -281,48 +379,56 @@ def test_get_version(backend): assert len(vers["versions"]) == 1 # testing the date-added headers - assert r.headers['X-TAXII-Date-Added-First'] == "2014-05-08T09:00:00.000000Z" - assert r.headers['X-TAXII-Date-Added-Last'] == "2014-05-08T09:00:00.000000Z" + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime("2014-05-08T09:00:00.000000Z") + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime("2014-05-08T09:00:00.000000Z") # test each filter type with each applicable endpoint -def test_get_objects_added_after(backend): - r = backend.client.get( +def test_get_objects_added_after(test_client): + r = test_client.get( test.GET_OBJECTS_EP + "?added_after=2016-11-03T12:30:59Z", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 objs = r.json assert objs['more'] is False - assert len(objs['objects']) == 3 + assert len(objs['objects']) == 4 -def test_get_objects_limit(backend): - r = backend.client.get( - test.GET_OBJECTS_EP + "?limit=3", - headers=backend.headers, +def test_get_objects_limit(test_client): + r = test_client.get( + test.GET_OBJECTS_EP + "?limit=4", + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 objs = r.json assert objs['more'] is True - assert len(objs['objects']) == 3 - assert r.headers['X-TAXII-Date-Added-First'] == '2014-05-08T09:00:00.000000Z' - assert r.headers['X-TAXII-Date-Added-Last'] == '2017-01-20T00:00:00.000000Z' + assert len(objs['objects']) == 4 + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime('2014-05-08T09:00:00.000000Z') + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime('2017-01-27T13:49:59.997000Z') correct_order = ['relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463', 'indicator--cd981c25-8042-4166-8945-51178443bdac', - 'marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da'] + 'marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da', + 'malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec'] - for x in range(0, len(correct_order)): - assert objs["objects"][x]["id"] == correct_order[x] + returned_order = [ + obj["id"] + for obj in objs["objects"] + ] - r = backend.client.get( + assert returned_order == correct_order + + r = test_client.get( test.GET_OBJECTS_EP + "?limit=3&next=" + r.json["next"], - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -331,20 +437,25 @@ def test_get_objects_limit(backend): assert objs['more'] is False assert len(objs['objects']) == 2 - assert r.headers['X-TAXII-Date-Added-First'] == '2017-01-27T13:49:59.997000Z' - assert r.headers['X-TAXII-Date-Added-Last'] == '2017-12-31T13:49:53.935000Z' + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime('2017-12-31T13:49:53.935000Z') + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime('2022-06-16T13:49:53.935000Z') - correct_order = ['malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec', - 'indicator--6770298f-0fd8-471a-ab8c-1c658a46574e'] + correct_order = ['indicator--6770298f-0fd8-471a-ab8c-1c658a46574e', + 'malware-analysis--084a658c-a7ef-4581-a21d-1f600908741b'] - for x in range(0, len(correct_order)): - assert objs["objects"][x]["id"] == correct_order[x] + returned_order = [ + obj["id"] + for obj in objs["objects"] + ] + assert returned_order == correct_order -def test_get_objects_id(backend): - r = backend.client.get( + +def test_get_objects_id(test_client): + r = test_client.get( test.GET_OBJECTS_EP + "?match[id]=malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -354,10 +465,11 @@ def test_get_objects_id(backend): assert len(objs['objects']) == 1 -def test_get_objects_type(backend): - r = backend.client.get( +def test_get_objects_type(test_client): + r = test_client.get( test.GET_OBJECTS_EP + "?match[type]=indicator", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -368,10 +480,11 @@ def test_get_objects_type(backend): assert all("indicator" == obj["type"] for obj in objs["objects"]) -def get_objects_by_version(backend, filter): - r = backend.client.get( +def get_objects_by_version(test_client, filter): + r = test_client.get( test.GET_OBJECTS_EP + filter, - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 @@ -380,41 +493,62 @@ def get_objects_by_version(backend, filter): return objs -def test_objects_version_match_specific_date(backend): - objs = get_objects_by_version(backend, "?match[version]=2016-12-25T12:30:59.444Z") +def test_objects_version_match_specific_date(test_client): + objs = get_objects_by_version(test_client, "?match[version]=2016-12-25T12:30:59.444Z") assert len(objs['objects']) == 1 assert objs["objects"][0]["id"] == "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e" -def test_objects_version_match_first(backend): - objs = get_objects_by_version(backend, "?match[version]=first") - for obj in objs["objects"]: - if obj["id"] == "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e": - assert obj["modified"] == "2016-11-03T12:30:59.000Z" - if obj["id"] == "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec": - assert obj["modified"] == "2017-01-27T13:49:53.997Z" +def test_objects_version_match_first(test_client): + objs = get_objects_by_version(test_client, "?match[version]=first") + + returned_id_version = [ + (obj["id"], common.timestamp_to_datetime(obj.get("modified") or obj["created"])) + for obj in objs["objects"] + ] + + correct_id_version = [ + ("relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463", common.timestamp_to_datetime("2014-05-08T09:00:00.000Z")), + ("indicator--cd981c25-8042-4166-8945-51178443bdac", common.timestamp_to_datetime("2014-05-08T09:00:00.000Z")), + ("indicator--6770298f-0fd8-471a-ab8c-1c658a46574e", common.timestamp_to_datetime("2016-11-03T12:30:59.000Z")), + ("marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da", common.timestamp_to_datetime("2017-01-20T00:00:00.000Z")), + ("malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", common.timestamp_to_datetime("2017-01-27T13:49:53.997Z")), + ("malware-analysis--084a658c-a7ef-4581-a21d-1f600908741b", common.timestamp_to_datetime("2021-12-11T07:17:44.542582Z")) + ] + + assert returned_id_version == correct_id_version + +def test_objects_version_match_last(test_client): + objs = get_objects_by_version(test_client, "?match[version]=last") -def test_objects_version_match_last(backend): - objs = get_objects_by_version(backend, "?match[version]=last") - for obj in objs["objects"]: - if obj["id"] == "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e": - assert obj["modified"] == "2017-01-27T13:49:53.935Z" - # Because the spec_version default filter comes before the version filter, the 2.0 version gets filtered out automatically - # If you put a spec_version=2.0,2.1 here, then the correct version would be here - # if obj["id"] == "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec": - # assert obj["modified"] == "2018-02-23T18:30:00.000Z" + returned_id_version = [ + (obj["id"], common.timestamp_to_datetime(obj.get("modified") or obj["created"])) + for obj in objs["objects"] + ] + correct_id_version = [ + ("relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463", common.timestamp_to_datetime("2014-05-08T09:00:00.000Z")), + ("indicator--cd981c25-8042-4166-8945-51178443bdac", common.timestamp_to_datetime("2014-05-08T09:00:00.000Z")), + ("marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da", common.timestamp_to_datetime("2017-01-20T00:00:00.000Z")), + ("malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", common.timestamp_to_datetime("2018-02-23T18:30:00.000Z")), + ("indicator--6770298f-0fd8-471a-ab8c-1c658a46574e", common.timestamp_to_datetime("2017-01-27T13:49:53.935Z")), + ("malware-analysis--084a658c-a7ef-4581-a21d-1f600908741b", common.timestamp_to_datetime("2021-12-11T07:17:44.542582Z")) + ] -def test_objects_version_match_all(backend): - objs = get_objects_by_version(backend, "?match[version]=all") - assert len(objs['objects']) == 7 + assert returned_id_version == correct_id_version -def get_objects_spec_version(backend, filter, num_objects): - r = backend.client.get( +def test_objects_version_match_all(test_client): + objs = get_objects_by_version(test_client, "?match[version]=all") + assert len(objs['objects']) == 8 + + +def get_objects_spec_version(test_client, filter, num_objects): + r = test_client.get( test.GET_OBJECTS_EP + filter, - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -425,30 +559,36 @@ def get_objects_spec_version(backend, filter, num_objects): return objs -def test_get_objects_spec_version_20(backend): - objs = get_objects_spec_version(backend, "?match[spec_version]=2.0", 1) +def test_get_objects_spec_version_20(test_client): + objs = get_objects_spec_version(test_client, "?match[spec_version]=2.0", 1) assert all("spec_version" not in obj for obj in objs['objects']) -def test_get_objects_spec_version_21_20(backend): - get_objects_spec_version(backend, "?match[spec_version]=2.0,2.1", 5) +def test_get_objects_spec_version_21_20(test_client): + get_objects_spec_version(test_client, "?match[spec_version]=2.0,2.1", 6) -def test_get_objects_spec_version_21(backend): - objs = get_objects_spec_version(backend, "?match[spec_version]=2.1", 5) +def test_get_objects_spec_version_21(test_client): + objs = get_objects_spec_version(test_client, "?match[spec_version]=2.1", 5) assert all(obj['spec_version'] == "2.1" for obj in objs['objects']) -def test_get_objects_spec_version_default(backend): - objs = get_objects_spec_version(backend, "", 5) - assert all(obj['spec_version'] == "2.1" for obj in objs['objects']) +def test_get_objects_spec_version_default(test_client): + get_objects_spec_version(test_client, "", 6) + # Removed spec_version check on results; they are a mix of 2.0 and 2.1 + # (it had checked that all spec_versions are 2.1). This is because version + # filtering (which in this case retains only the latest versions) occurs + # before spec_version filtering in this implementation, which causes a + # latest-version 2.0 object to be retained and the earlier 2.1 version to + # be filtered out. -def get_object_added_after(backend, filter): - r = backend.client.get( +def get_object_added_after(test_client, filter): + r = test_client.get( test.GET_OBJECTS_EP + "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec" + filter, - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -456,23 +596,24 @@ def get_object_added_after(backend, filter): return r.json -def test_get_object_added_after_case1(backend): - objs = get_object_added_after(backend, "?added_after=2018-01-27T13:49:59.997000Z") +def test_get_object_added_after_case1(test_client): + objs = get_object_added_after(test_client, "?added_after=2018-01-27T13:49:59.997000Z") assert 'more' not in objs assert 'objects' not in objs -def test_get_object_added_after_case2(backend): - objs = get_object_added_after(backend, "?added_after=2017-01-27T13:49:59Z") +def test_get_object_added_after_case2(test_client): + objs = get_object_added_after(test_client, "?added_after=2017-01-27T13:49:59Z") assert objs['more'] is False assert len(objs['objects']) == 1 -def test_get_object_limit(backend): - r = backend.client.get( +def test_get_object_limit(test_client): + r = test_client.get( test.GET_OBJECTS_EP + "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e?limit=1", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -483,10 +624,11 @@ def test_get_object_limit(backend): assert r.headers['X-TAXII-Date-Added-First'] == '2017-12-31T13:49:53.935000Z' assert r.headers['X-TAXII-Date-Added-Last'] == '2017-12-31T13:49:53.935000Z' - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e?match[version]=all&limit=2", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -500,10 +642,11 @@ def test_get_object_limit(backend): assert objs['objects'][0]['modified'] == '2016-11-03T12:30:59.000Z' assert objs['objects'][1]['modified'] == '2016-12-25T12:30:59.444Z' - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e?match[version]=all&limit=2&next=" + objs['next'], - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -511,19 +654,20 @@ def test_get_object_limit(backend): objs = r.json assert objs['more'] is False assert len(objs['objects']) == 1 - assert r.headers['X-TAXII-Date-Added-First'] == '2017-12-31T13:49:53.935000Z' - assert r.headers['X-TAXII-Date-Added-Last'] == '2017-12-31T13:49:53.935000Z' + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime('2017-12-31T13:49:53.935000Z') + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime('2017-12-31T13:49:53.935000Z') @pytest.mark.parametrize("filter, modified", [("?match[version]=2016-12-25T12:30:59.444Z", "2016-12-25T12:30:59.444Z"), ("?match[version]=first", "2016-11-03T12:30:59.000Z"), ("?match[version]=last", "2017-01-27T13:49:53.935Z")]) -def test_get_object_version_single(backend, filter, modified): +def test_get_object_version_single(test_client, filter, modified): objstr = "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e" - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + objstr + filter, - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -532,15 +676,16 @@ def test_get_object_version_single(backend, filter, modified): assert objs['more'] is False assert len(objs['objects']) == 1 assert objs["objects"][0]["id"] == objstr - assert objs["objects"][0]["modified"] == modified + assert common.timestamp_to_datetime(objs["objects"][0]["modified"]) == common.timestamp_to_datetime(modified) -def test_get_object_version_match_all(backend): +def test_get_object_version_match_all(test_client): - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e?match[version]=all", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -550,64 +695,72 @@ def test_get_object_version_match_all(backend): assert len(objs['objects']) == 3 -def get_object_spec_version(backend, filter, matching): - r = backend.client.get( +def get_object_spec_version(test_client, filter, matching, num_expected): + r = test_client.get( test.GET_OBJECTS_EP + filter + matching, - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 objs = r.json - assert objs['more'] is False - assert len(objs['objects']) == 1 + assert objs.get('more', False) is False + assert len(objs.get('objects', [])) == num_expected return objs -def test_get_object_spec_version_20(backend): - objs = get_object_spec_version(backend, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "?match[spec_version]=2.0") +def test_get_object_spec_version_20(test_client): + objs = get_object_spec_version(test_client, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "?match[spec_version]=2.0", 1) assert all('spec_version' not in obj for obj in objs['objects']) -def test_get_object_spec_version_21(backend): - objs = get_object_spec_version(backend, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "?match[spec_version]=2.1") - assert all(obj['spec_version'] == "2.1" for obj in objs['objects']) +def test_get_object_spec_version_21(test_client): + objs = get_object_spec_version(test_client, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "?match[spec_version]=2.1", 0) + assert all(obj['spec_version'] == "2.1" for obj in objs.get('objects', [])) -def test_get_object_spec_version_2021(backend): - # though this is getting objects with every spec_version, the version filter gets only the latest object. - objs = get_object_spec_version(backend, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "?match[spec_version]=2.0,2.1") - for obj in objs['objects']: - if obj['id'] == "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec": - assert obj['modified'] == "2018-02-23T18:30:00.000Z" +def test_get_object_spec_version_2021(test_client): + objs = get_object_spec_version(test_client, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "?match[spec_version]=2.0,2.1", 1) + returned_id_version = [ + (o["id"], o.get("modified") or o["created"]) + for o in objs["objects"] + ] -def test_get_object_spec_version_default(backend): - objs = get_object_spec_version(backend, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "") - assert all(obj['spec_version'] == "2.1" for obj in objs['objects']) + correct_id_version = [ + ("malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "2018-02-23T18:30:00.000Z") + ] + + assert returned_id_version == correct_id_version -def test_get_manifest_added_after(backend): - r = backend.client.get( +def test_get_object_spec_version_default(test_client): + get_object_spec_version(test_client, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "", 1) + + +def test_get_manifest_added_after(test_client): + r = test_client.get( test.GET_MANIFESTS_EP + "?added_after=2017-01-20T00:00:00.000Z", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 objs = r.json assert objs['more'] is False - # only 2 because one is v2.0 - assert len(objs['objects']) == 2 + assert len(objs['objects']) == 3 -def test_get_manifest_limit(backend): - r = backend.client.get( +def test_get_manifest_limit(test_client): + r = test_client.get( test.GET_MANIFESTS_EP + "?limit=2", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -615,13 +768,14 @@ def test_get_manifest_limit(backend): objs = r.json assert objs['more'] is True assert len(objs['objects']) == 2 - assert r.headers['X-TAXII-Date-Added-First'] == objs['objects'][0]['date_added'] - assert r.headers['X-TAXII-Date-Added-Last'] == objs['objects'][-1]['date_added'] + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime(objs['objects'][0]['date_added']) + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime(objs['objects'][-1]['date_added']) - r = backend.client.get( + r = test_client.get( test.GET_MANIFESTS_EP + "?limit=2&next=" + objs['next'], - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -629,30 +783,32 @@ def test_get_manifest_limit(backend): objs = r.json assert objs['more'] is True assert len(objs['objects']) == 2 - assert r.headers['X-TAXII-Date-Added-First'] == objs['objects'][0]['date_added'] - assert r.headers['X-TAXII-Date-Added-Last'] == objs['objects'][-1]['date_added'] + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime(objs['objects'][0]['date_added']) + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime(objs['objects'][-1]['date_added']) - r = backend.client.get( - test.GET_MANIFESTS_EP + "?limit=2&next=" + objs['next'], - headers=backend.headers, - follow_redirects=True + r = test_client.get( + test.GET_MANIFESTS_EP + "?limit=3&next=" + objs['next'], + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 objs = r.json assert objs['more'] is False - assert len(objs['objects']) == 1 - assert r.headers['X-TAXII-Date-Added-First'] == objs['objects'][0]['date_added'] - assert r.headers['X-TAXII-Date-Added-Last'] == objs['objects'][-1]['date_added'] + assert len(objs['objects']) == 2 + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime(objs['objects'][0]['date_added']) + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime(objs['objects'][-1]['date_added']) -def test_get_manifest_id(backend): +def test_get_manifest_id(test_client): object_id = "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec" - r = backend.client.get( + r = test_client.get( test.GET_MANIFESTS_EP + "?match[id]=" + object_id, - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -660,14 +816,18 @@ def test_get_manifest_id(backend): objs = r.json assert objs['more'] is False assert len(objs['objects']) == 1 - assert objs['objects'][0]['id'] == object_id + assert all( + obj["id"] == object_id + for obj in objs["objects"] + ) -def test_get_manifest_type(backend): - r = backend.client.get( +def test_get_manifest_type(test_client): + r = test_client.get( test.GET_MANIFESTS_EP + "?match[type]=indicator", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -678,12 +838,13 @@ def test_get_manifest_type(backend): assert all('indicator' in obj['id'] for obj in objs['objects']) -def get_manifest_version(backend, filter): +def get_manifest_version(test_client, filter): - r = backend.client.get( + r = test_client.get( test.GET_MANIFESTS_EP + filter, - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 @@ -692,41 +853,42 @@ def get_manifest_version(backend, filter): return objs -def test_get_manifest_version_specific(backend): +def test_get_manifest_version_specific(test_client): object_id = "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e" - objs = get_manifest_version(backend, "?match[version]=2016-12-25T12:30:59.444Z") + objs = get_manifest_version(test_client, "?match[version]=2016-12-25T12:30:59.444Z") assert len(objs['objects']) == 1 assert objs["objects"][0]["id"] == object_id - assert objs["objects"][0]["version"] == "2016-12-25T12:30:59.444Z" + assert common.timestamp_to_datetime(objs["objects"][0]["version"]) == common.timestamp_to_datetime("2016-12-25T12:30:59.444Z") -def test_get_manifest_version_first(backend): +def test_get_manifest_version_first(test_client): object_id = "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e" - objs = get_manifest_version(backend, "?match[version]=first") - assert len(objs['objects']) == 5 + objs = get_manifest_version(test_client, "?match[version]=first") + assert len(objs['objects']) == 6 for obj in objs['objects']: if obj['id'] == object_id: - assert obj['version'] == "2016-11-03T12:30:59.000Z" + assert common.timestamp_to_datetime(obj['version']) == common.timestamp_to_datetime("2016-11-03T12:30:59.000Z") -def test_get_manifest_version_last(backend): +def test_get_manifest_version_last(test_client): object_id = "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e" - objs = get_manifest_version(backend, "?match[version]=last") - assert len(objs['objects']) == 5 + objs = get_manifest_version(test_client, "?match[version]=last") + assert len(objs['objects']) == 6 for obj in objs['objects']: if obj['id'] == object_id: - assert obj['version'] == "2017-01-27T13:49:53.935Z" + assert common.timestamp_to_datetime(obj['version']) == common.timestamp_to_datetime("2017-01-27T13:49:53.935Z") -def test_get_manifest_version_all(backend): - objs = get_manifest_version(backend, "?match[version]=all") - assert len(objs['objects']) == 7 +def test_get_manifest_version_all(test_client): + objs = get_manifest_version(test_client, "?match[version]=all") + assert len(objs['objects']) == 8 -def get_manifest_spec_version(backend, filter): - r = backend.client.get( +def get_manifest_spec_version(test_client, filter): + r = test_client.get( test.GET_MANIFESTS_EP + filter, - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -736,39 +898,66 @@ def get_manifest_spec_version(backend, filter): return objs -def test_manifest_spec_version_20(backend): - objs = get_manifest_spec_version(backend, "?match[spec_version]=2.0") +def test_manifest_spec_version_20(test_client): + objs = get_manifest_spec_version(test_client, "?match[spec_version]=2.0") assert len(objs['objects']) == 1 assert all(obj['media_type'] == "application/stix+json;version=2.0" for obj in objs['objects']) -def test_manifest_spec_version_21(backend): - objs = get_manifest_spec_version(backend, "?match[spec_version]=2.1") +def test_manifest_spec_version_21(test_client): + objs = get_manifest_spec_version(test_client, "?match[spec_version]=2.1") assert len(objs['objects']) == 5 assert all(obj['media_type'] == "application/stix+json;version=2.1" for obj in objs['objects']) -def test_manifest_spec_version_2021(backend): - objs = get_manifest_spec_version(backend, "?match[spec_version]=2.0,2.1") +def test_manifest_spec_version_2021(test_client): + objs = get_manifest_spec_version(test_client, "?match[spec_version]=2.0,2.1") # though the spec_version filter is getting all objects, the automatic filtering by version only gets the latest objects - assert len(objs['objects']) == 5 - for obj in objs['objects']: - if obj['id'] == "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec": - assert obj['version'] == "2018-02-23T18:30:00.000Z" + returned = [ + (man["id"], man["media_type"]) + for man in objs["objects"] + ] -def test_manifest_spec_version_default(backend): - objs = get_manifest_spec_version(backend, "") + expected = [ + ("relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463", "application/stix+json;version=2.1"), + ("indicator--cd981c25-8042-4166-8945-51178443bdac", "application/stix+json;version=2.1"), + ("marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da", "application/stix+json;version=2.1"), + ("malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "application/stix+json;version=2.0"), + ("indicator--6770298f-0fd8-471a-ab8c-1c658a46574e", "application/stix+json;version=2.1"), + ("malware-analysis--084a658c-a7ef-4581-a21d-1f600908741b", "application/stix+json;version=2.1") + ] + + assert returned == expected + + +def test_manifest_spec_version_default(test_client): + objs = get_manifest_spec_version(test_client, "") # testing default value - assert len(objs['objects']) == 5 - assert all(obj['media_type'] == "application/stix+json;version=2.1" for obj in objs['objects']) + returned = [ + (man["id"], man["media_type"]) + for man in objs["objects"] + ] + + expected = [ + ("relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463", "application/stix+json;version=2.1"), + ("indicator--cd981c25-8042-4166-8945-51178443bdac", "application/stix+json;version=2.1"), + ("marking-definition--34098fce-860f-48ae-8e50-ebd3cc5e41da", "application/stix+json;version=2.1"), + ("malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", "application/stix+json;version=2.0"), + ("indicator--6770298f-0fd8-471a-ab8c-1c658a46574e", "application/stix+json;version=2.1"), + ("malware-analysis--084a658c-a7ef-4581-a21d-1f600908741b", "application/stix+json;version=2.1") + ] -def test_get_version_added_after(backend): - r = backend.client.get( + assert returned == expected + + +def test_get_version_added_after(test_client): + r = test_client.get( test.GET_OBJECTS_EP + "relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463/versions?added_after=2014-05-08T09:00:00Z", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -776,10 +965,11 @@ def test_get_version_added_after(backend): objs = r.json assert objs == {} - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463/versions?added_after=2014-05-08T08:00:00Z", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -789,12 +979,13 @@ def test_get_version_added_after(backend): assert len(objs["versions"]) == 1 -def test_get_version_limit(backend): +def test_get_version_limit(test_client): - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e/versions?limit=1", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -802,14 +993,15 @@ def test_get_version_limit(backend): objs = r.json assert objs["more"] is True assert len(objs["versions"]) == 1 - assert objs["versions"][0] == '2016-11-03T12:30:59.000Z' - assert r.headers['X-TAXII-Date-Added-First'] == '2016-11-03T12:30:59.001000Z' - assert r.headers['X-TAXII-Date-Added-Last'] == '2016-11-03T12:30:59.001000Z' + assert common.timestamp_to_datetime(objs["versions"][0]) == common.timestamp_to_datetime('2016-11-03T12:30:59.000Z') + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime('2016-11-03T12:30:59.001000Z') + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime('2016-11-03T12:30:59.001000Z') - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e/versions?limit=1&next=" + objs["next"], - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -817,14 +1009,15 @@ def test_get_version_limit(backend): objs = r.json assert objs["more"] is True assert len(objs["versions"]) == 1 - assert objs["versions"][0] == '2016-12-25T12:30:59.444Z' - assert r.headers['X-TAXII-Date-Added-First'] == '2016-12-27T13:49:59.000000Z' - assert r.headers['X-TAXII-Date-Added-Last'] == '2016-12-27T13:49:59.000000Z' + assert common.timestamp_to_datetime(objs["versions"][0]) == common.timestamp_to_datetime('2016-12-25T12:30:59.444Z') + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime('2016-12-27T13:49:59.000000Z') + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime('2016-12-27T13:49:59.000000Z') - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e/versions?limit=1&next=" + objs["next"], - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -832,16 +1025,17 @@ def test_get_version_limit(backend): objs = r.json assert objs["more"] is False assert len(objs["versions"]) == 1 - assert objs["versions"][0] == '2017-01-27T13:49:53.935Z' - assert r.headers['X-TAXII-Date-Added-First'] == '2017-12-31T13:49:53.935000Z' - assert r.headers['X-TAXII-Date-Added-Last'] == '2017-12-31T13:49:53.935000Z' + assert common.timestamp_to_datetime(objs["versions"][0]) == common.timestamp_to_datetime('2017-01-27T13:49:53.935Z') + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-First']) == common.timestamp_to_datetime('2017-12-31T13:49:53.935000Z') + assert common.timestamp_to_datetime(r.headers['X-TAXII-Date-Added-Last']) == common.timestamp_to_datetime('2017-12-31T13:49:53.935000Z') -def get_version_spec_version(backend, filter): - r = backend.client.get( +def get_version_spec_version(test_client, filter): + r = test_client.get( test.GET_OBJECTS_EP + filter, - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 @@ -850,33 +1044,42 @@ def get_version_spec_version(backend, filter): return objs -def test_get_version_spec_version_20(backend): - objs = get_version_spec_version(backend, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec/versions?match[spec_version]=2.0") +def test_get_version_spec_version_20(test_client): + objs = get_version_spec_version(test_client, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec/versions?match[spec_version]=2.0") assert len(objs["versions"]) == 1 - assert objs["versions"][0] == "2018-02-23T18:30:00.000Z" + assert common.timestamp_to_datetime(objs["versions"][0]) == common.timestamp_to_datetime("2018-02-23T18:30:00.000Z") -def test_get_version_spec_version_21(backend): - objs = get_version_spec_version(backend, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec/versions?match[spec_version]=2.1") +def test_get_version_spec_version_21(test_client): + objs = get_version_spec_version(test_client, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec/versions?match[spec_version]=2.1") assert len(objs["versions"]) == 1 - assert objs["versions"][0] == "2017-01-27T13:49:53.997Z" + assert common.timestamp_to_datetime(objs["versions"][0]) == common.timestamp_to_datetime("2017-01-27T13:49:53.997Z") -def test_get_version_spec_version_2021(backend): - objs = get_version_spec_version(backend, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec/versions?match[spec_version]=2.0,2.1") +def test_get_version_spec_version_2021(test_client): + objs = get_version_spec_version(test_client, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec/versions?match[spec_version]=2.0,2.1") assert len(objs["versions"]) == 2 -def test_get_version_spec_version_default(backend): - objs = get_version_spec_version(backend, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec/versions") +def test_get_version_spec_version_default(test_client): + objs = get_version_spec_version(test_client, "malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec/versions") # testing default value for spec_version - assert len(objs["versions"]) == 1 - assert objs["versions"][0] == "2017-01-27T13:49:53.997Z" + returned = [ + common.timestamp_to_datetime(ver) + for ver in objs["versions"] + ] -def test_delete_objects_version(backend): + correct = [ + common.timestamp_to_datetime("2017-01-27T13:49:53.997Z") + ] + + assert returned == correct + + +def test_delete_objects_version(test_client): add_objects = {"objects": []} - coa_object = copy.deepcopy(backend.TEST_OBJECT["objects"][0]) + coa_object = copy.deepcopy(TEST_OBJECT["objects"][0]) object_id = coa_object["id"] coa_object["created"] = "2014-01-27T13:49:53.935Z" @@ -890,20 +1093,22 @@ def test_delete_objects_version(backend): coa_object["modified"] = "2019-01-27T13:49:53.935Z" add_objects["objects"].append(copy.deepcopy(coa_object)) - r_post = backend.client.post( + r_post = test_client.post( test.ADD_OBJECTS_EP, data=json.dumps(add_objects), - headers=backend.post_headers, + headers=POST_HEADERS, + auth=("admin", "Password0") ) assert r_post.status_code == 202 assert r_post.content_type == MEDIA_TYPE_TAXII_V21 status_response = r_post.json assert status_response["success_count"] == 5 # Simple check to assert objects got successfully added to backend - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "/versions", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -912,18 +1117,20 @@ def test_delete_objects_version(backend): assert objs["more"] is False assert len(objs["versions"]) == 5 - r = backend.client.delete( + r = test_client.delete( test.ADD_OBJECTS_EP + object_id + "?match[version]=2018-01-27T13:49:53.935Z", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "/versions", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -933,18 +1140,20 @@ def test_delete_objects_version(backend): assert len(objs["versions"]) == 4 assert "2018-01-27T13:49:53.935Z" not in objs["versions"] - r = backend.client.delete( + r = test_client.delete( test.ADD_OBJECTS_EP + object_id + "?match[version]=first", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "/versions", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -954,18 +1163,20 @@ def test_delete_objects_version(backend): assert len(objs["versions"]) == 3 assert "2015-01-27T13:49:53.935Z" not in objs["versions"] - r = backend.client.delete( + r = test_client.delete( test.ADD_OBJECTS_EP + object_id + "?match[version]=last", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "/versions", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -975,52 +1186,57 @@ def test_delete_objects_version(backend): assert len(objs["versions"]) == 2 assert "2019-01-27T13:49:53.935Z" not in objs["versions"] - r = backend.client.delete( + r = test_client.delete( test.ADD_OBJECTS_EP + object_id + "?match[version]=all", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "/versions", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 404 assert r.content_type == MEDIA_TYPE_TAXII_V21 -def test_delete_objects_spec_version(backend): - new_objects = copy.deepcopy(backend.TEST_OBJECT) +def test_delete_objects_spec_version(test_client): + new_objects = copy.deepcopy(TEST_OBJECT) obj = copy.deepcopy(new_objects["objects"][0]) obj["modified"] = "2019-01-27T13:49:53.935Z" obj["spec_version"] = "2.0" new_objects["objects"].append(copy.deepcopy(obj)) object_id = obj["id"] - r_post = backend.client.post( + r_post = test_client.post( test.ADD_OBJECTS_EP, data=json.dumps(new_objects), - headers=backend.post_headers, + headers=POST_HEADERS, + auth=("admin", "Password0") ) assert r_post.status_code == 202 assert r_post.content_type == MEDIA_TYPE_TAXII_V21 - r = backend.client.delete( + r = test_client.delete( test.ADD_OBJECTS_EP + object_id + "?match[spec_version]=2.0", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "/versions", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -1030,25 +1246,27 @@ def test_delete_objects_spec_version(backend): assert len(objs["versions"]) == 1 assert "2019-01-27T13:49:53.935Z" not in objs["versions"] - r = backend.client.delete( + r = test_client.delete( test.ADD_OBJECTS_EP + object_id + "?match[spec_version]=2.1", - headers=backend.headers, - follow_redirects=True + headers=GET_HEADERS, + follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "/versions", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 404 assert r.content_type == MEDIA_TYPE_TAXII_V21 -def test_SCO_versioning(backend): +def test_SCO_versioning(test_client): SCO = { "objects": [ @@ -1064,18 +1282,20 @@ def test_SCO_versioning(backend): } object_id = SCO["objects"][0]["id"] - r_post = backend.client.post( + r_post = test_client.post( test.ADD_OBJECTS_EP, data=json.dumps(copy.deepcopy(SCO)), - headers=backend.post_headers, + headers=POST_HEADERS, + auth=("admin", "Password0") ) assert r_post.status_code == 202 assert r_post.content_type == MEDIA_TYPE_TAXII_V21 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "?match[version]=all", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -1083,10 +1303,11 @@ def test_SCO_versioning(backend): objs = r.json assert len(objs["objects"]) == 1 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "?match[version]=first", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -1094,10 +1315,11 @@ def test_SCO_versioning(backend): objs = r.json assert len(objs["objects"]) == 1 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "?match[version]=last", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -1105,10 +1327,11 @@ def test_SCO_versioning(backend): objs = r.json assert len(objs["objects"]) == 1 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "?added_after=2017-01-27T13:49:53.935Z", - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -1116,10 +1339,11 @@ def test_SCO_versioning(backend): objs = r.json assert len(objs["objects"]) == 1 - r = backend.client.get( + r = test_client.get( test.ADD_OBJECTS_EP + object_id + "?added_after=" + common.datetime_to_string_stix(common.get_timestamp()), - headers=backend.headers, + headers=GET_HEADERS, follow_redirects=True, + auth=("admin", "Password0") ) assert r.status_code == 200 @@ -1131,91 +1355,117 @@ def test_SCO_versioning(backend): # test non-200 responses -def test_get_api_root_information_not_existent(backend): - r = backend.client.get("/trustgroup2/", headers=backend.headers) +def test_get_api_root_information_not_existent(test_client): + r = test_client.get( + "/trustgroup2/", headers=GET_HEADERS, auth=("admin", "Password0") + ) assert r.status_code == 404 -def test_get_collection_not_existent(backend): +def test_get_collection_not_existent(test_client): - r = backend.client.get( + r = test_client.get( test.NON_EXISTENT_COLLECTION_EP, - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 404 -def test_get_collections_401(backend): - r = backend.client.get(test.COLLECTIONS_EP) +def test_get_collections_401(test_client): + r = test_client.get(test.COLLECTIONS_EP, headers=GET_HEADERS) assert r.status_code == 401 -def test_get_collections_404(backend): +def test_get_collections_404(test_client): # note that the api root "carbon1" is nonexistent - r = backend.client.get("/carbon1/collections/", headers=backend.headers) + r = test_client.get( + "/carbon1/collections/", + headers=GET_HEADERS, + auth=("admin", "Password0") + ) assert r.status_code == 404 -def test_get_collection_404(backend): +def test_get_collection_404(test_client): # note that api root "carbon1" is nonexistent - r = backend.client.get("/carbon1/collections/12345678-1234-1234-1234-123456789012/", headers=backend.headers) + r = test_client.get( + "/carbon1/collections/12345678-1234-1234-1234-123456789012/", + headers=GET_HEADERS, + auth=("admin", "Password0") + ) assert r.status_code == 404 -def test_get_status_401(backend): +def test_get_status_401(test_client): # non existent object ID but shouldn't matter as the request should never pass login auth - r = backend.client.get(test.API_ROOT_EP + "status/2223/") + r = test_client.get( + test.API_ROOT_EP + "status/2223/", + headers=GET_HEADERS + ) assert r.status_code == 401 -def test_get_status_404(backend): - r = backend.client.get(test.API_ROOT_EP + "status/22101993/", headers=backend.headers) +def test_get_status_404(test_client): + r = test_client.get( + test.API_ROOT_EP + "status/22101993/", + headers=GET_HEADERS, + auth=("admin", "Password0") + ) assert r.status_code == 404 -def test_get_object_manifest_401(backend): +def test_get_object_manifest_401(test_client): # non existent object ID but shouldnt matter as the request should never pass login - r = backend.client.get(test.COLLECTIONS_EP + "24042009/manifest/") + r = test_client.get(test.COLLECTIONS_EP + "24042009/manifest/") assert r.status_code == 401 -def test_get_object_manifest_403(backend): - r = backend.client.get( +def test_get_object_manifest_403(test_client): + r = test_client.get( test.FORBIDDEN_COLLECTION_EP + "manifest/", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 403 -def test_get_object_manifest_404(backend): +def test_get_object_manifest_404(test_client): # note that collection ID doesnt exist - r = backend.client.get(test.COLLECTIONS_EP + "24042009/manifest/", headers=backend.headers) + r = test_client.get( + test.COLLECTIONS_EP + "24042009/manifest/", + headers=GET_HEADERS, + auth=("admin", "Password0") + ) assert r.status_code == 404 -def test_get_object_401(backend): - r = backend.client.get( +def test_get_object_401(test_client): + r = test_client.get( test.GET_OBJECTS_EP + "malware--fdd60b30-b67c-11e3-b0b9-f01faf20d111/", + headers=GET_HEADERS ) assert r.status_code == 401 -def test_get_object_403(backend): +def test_get_object_403(test_client): """note that the 403 code is still being generated at the Collection resource level (i.e. we dont have access rights to the collection specified, not just the object) """ - r = backend.client.get( + r = test_client.get( test.FORBIDDEN_COLLECTION_EP + "objects/indicator--b81f86b9-975b-bb0b-775e-810c5bd45b4f/", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 403 -def test_get_object_404(backend): +def test_get_object_404(test_client): # TAXII spec allows for a 404 or empty bundle if object is not found - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "malware--cee60c30-a68c-11e3-b0c1-a01aac20d000/", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) objs = r.json @@ -1225,69 +1475,72 @@ def test_get_object_404(backend): assert r.status_code == 404 -def test_get_or_add_objects_401(backend): +def test_get_or_add_objects_401(test_client): # note that no credentials are supplied with requests # get_objects() - r = backend.client.get(test.ADD_OBJECTS_EP) + r = test_client.get(test.ADD_OBJECTS_EP, headers=GET_HEADERS) assert r.status_code == 401 # add_objects() - bad_headers = copy.deepcopy(backend.post_headers) - bad_headers.pop("Authorization") - r_post = backend.client.post( + r_post = test_client.post( test.ADD_OBJECTS_EP, - data=json.dumps(copy.deepcopy(backend.TEST_OBJECT)), - headers=bad_headers, + data=json.dumps(copy.deepcopy(TEST_OBJECT)), + headers=POST_HEADERS, ) assert r_post.status_code == 401 -def get_or_add_objects_403(backend): +def test_get_or_add_objects_403(test_client): """note that the 403 code is still being generated at the Collection resource level (i.e. we dont have access rights to the collection specified here, not just the object) """ # get_objects() - r = backend.client.get( + r = test_client.get( test.FORBIDDEN_COLLECTION_EP + "objects/", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 403 # add_objects - r_post = backend.client.post( + r_post = test_client.post( test.FORBIDDEN_COLLECTION_EP + "objects/", - data=json.dumps(copy.deepcopy(backend.TEST_OBJECT)), - headers=backend.post_headers, + data=json.dumps(copy.deepcopy(TEST_OBJECT)), + headers=POST_HEADERS, + auth=("admin", "Password0") ) assert r_post.status_code == 403 -def test_get_or_add_objects_404(backend): +def test_get_or_add_objects_404(test_client): # get_objects() - r = backend.client.get( + r = test_client.get( test.NON_EXISTENT_COLLECTION_EP + "objects/", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 404 # add_objects - r_post = backend.client.post( + r_post = test_client.post( test.NON_EXISTENT_COLLECTION_EP + "objects/", - data=json.dumps(copy.deepcopy(backend.TEST_OBJECT)), - headers=backend.post_headers, + data=json.dumps(copy.deepcopy(TEST_OBJECT)), + headers=POST_HEADERS, + auth=("admin", "Password0") ) assert r_post.status_code == 404 -def test_get_or_add_objects_422(backend): +def test_get_or_add_objects_422(test_client): """only applies to adding objects as would arise if user content is malformed""" - r_post = backend.client.post( + r_post = test_client.post( test.ADD_OBJECTS_EP, - data=json.dumps(copy.deepcopy(backend.TEST_OBJECT["objects"][0])), - headers=backend.post_headers, + data=json.dumps(copy.deepcopy(TEST_OBJECT["objects"][0])), + headers=POST_HEADERS, + auth=("admin", "Password0") ) assert r_post.status_code == 422 @@ -1295,19 +1548,22 @@ def test_get_or_add_objects_422(backend): error_data = r_post.json assert error_data["title"] == "ProcessingError" assert error_data["http_status"] == '422' - assert "While processing supplied content, an error occurred" in error_data["description"] -def test_object_pagination_bad_limit_value_400(backend): - r = backend.client.get(test.GET_OBJECTS_EP + "?limit=-20", - headers=backend.headers) +def test_object_pagination_bad_limit_value_400(test_client): + r = test_client.get( + test.GET_OBJECTS_EP + "?limit=-20", + headers=GET_HEADERS, + auth=("admin", "Password0") + ) assert r.status_code == 400 -def test_object_pagination_changing_params_400(backend): - r = backend.client.get( +def test_object_pagination_changing_params_400(test_client): + r = test_client.get( test.GET_OBJECTS_EP + "?match[version]=all&limit=2", - headers=backend.headers + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 @@ -1315,9 +1571,10 @@ def test_object_pagination_changing_params_400(backend): assert len(objs["objects"]) == 2 assert objs["more"] - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "?match[version]=all&limit=2&next=" + objs["next"], - headers=backend.headers + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 200 assert r.content_type == MEDIA_TYPE_TAXII_V21 @@ -1325,9 +1582,10 @@ def test_object_pagination_changing_params_400(backend): assert len(objs["objects"]) == 2 assert objs["more"] - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "?match[version]=first&limit=2&next=" + objs["next"], - headers=backend.headers + headers=GET_HEADERS, + auth=("admin", "Password0") ) assert r.status_code == 400 assert r.content_type == MEDIA_TYPE_TAXII_V21 @@ -1335,76 +1593,71 @@ def test_object_pagination_changing_params_400(backend): assert objs["title"] == "ProcessingError" -# test other config values -# this may warrant some cleanup and organization later -class TestTAXIIWithNoConfig(TaxiiTest): - type = "memory_no_config" - - def test_no_config(): with pytest.raises(exceptions.InitializationError) as e: - server = TestTAXIIWithNoConfig() - server.setUp() + create_app({}) assert str(e.value) == "You did not give backend information in your config." - server.tearDown() -class TestTAXIIWithNoTAXIISection(TaxiiTest): - type = "no_taxii" - - -@pytest.fixture(scope="module") -def no_taxii_section(): - server = TestTAXIIWithNoTAXIISection() - server.setUp() - yield server - server.tearDown() - - -def test_default_taxii_no_taxii_section(no_taxii_section): - assert no_taxii_section.app.taxii_config['max_page_size'] == 100 +def test_default_taxii_no_taxii_section(): + configuration = { + "backend": { + "module_class": "MemoryBackend", + "filename": DATA_FILE, + "interop_requirements": True, + }, + "users": { + "admin": "Password0" + } + } -class TestTAXIIWithNoAuthSection(TaxiiTest): - type = "no_auth" + app = create_app(configuration) + assert app.taxii_config['max_page_size'] == 100 -@pytest.fixture(scope="module") -def no_auth_section(): - server = TestTAXIIWithNoAuthSection() - server.setUp() - yield server - server.tearDown() + app.medallion_backend.close() -def test_default_userpass_no_auth_section(no_auth_section): - assert no_auth_section.app.users_config.get("user") == "pass" - +def test_default_userpass_no_auth_section(): + configuration = { + "backend": { + "module_class": "MemoryBackend", + "filename": DATA_FILE, + "interop_requirements": True, + }, + "taxii": { + "max_page_size": 20, + }, + } -class TestTAXIIWithNoBackendSection(TaxiiTest): - type = "no_backend" + app = create_app(configuration) + assert app.users_config.get("user") == "pass" -@pytest.fixture(scope="module") -def no_backend_section(): - server = TestTAXIIWithNoBackendSection() - server.setUp() - yield server - server.tearDown() + app.medallion_backend.close() def test_default_backend_no_backend_section(): with pytest.raises(exceptions.InitializationError) as e: - server = TestTAXIIWithNoBackendSection() - server.setUp() - assert str(e.value) == "You did not give backend information in your config." - server.tearDown() + configuration = { + "users": { + "admin": "Password0", + }, + "taxii": { + "max_page_size": 20, + }, + } + + create_app(configuration) + + assert str(e.value) == "You did not give backend information in your config.." # test collections with different can_read and can_write values # test if program will accept duplicate objects being posted -def test_object_already_present(backend): +def test_object_already_present(test_client): object_copy = { "created": "2014-05-08T09:00:00.000Z", "modified": "2014-05-08T09:00:00.000Z", @@ -1421,18 +1674,20 @@ def test_object_already_present(backend): add_objects["objects"].append(object_copy) # add object to test against - r_post = backend.client.post( + r_post = test_client.post( test.ADD_OBJECTS_EP, data=json.dumps(add_objects), - headers=backend.post_headers, + headers=POST_HEADERS, + auth=("admin", "Password0") ) add_objects["objects"].append(object_copy2) # try to add a duplicate, with and without the modified key (both should fail) - r_post = backend.client.post( + r_post = test_client.post( test.ADD_OBJECTS_EP, data=json.dumps(add_objects), - headers=backend.post_headers, + headers=POST_HEADERS, + auth=("admin", "Password0") ) status_data = r_post.json assert r_post.status_code == 202 @@ -1442,36 +1697,110 @@ def test_object_already_present(backend): assert "successes" in status_data -def test_save_to_file(backend): - if backend.type != "memory": +def test_save_to_file(flask_app): + if flask_app.backend_config["module_class"] != "MemoryBackend": pytest.skip() - with tempfile.NamedTemporaryFile(mode='w') as tmpfile: - backend.app.medallion_backend.save_data_to_file(tmpfile.name) + with tempfile.NamedTemporaryFile(mode='w+') as tmpfile: + flask_app.medallion_backend.save_data_to_file(tmpfile) tmpfile.flush() - with open(tmpfile.name) as f: - data = json.load(f) - assert data['trustgroup1']['collections'][0]['id'] == "472c94ae-3113-4e3e-a4dd-a9f4ac7471d4" - assert data['trustgroup1']['collections'][1]['id'] == "365fed99-08fa-fdcd-a1b3-fb247eb41d01" - assert data['trustgroup1']['collections'][2]['id'] == "91a7b528-80eb-42ed-a74d-c6fbd5a26116" - assert data['trustgroup1']['collections'][3]['id'] == "52892447-4d7e-4f70-b94d-d7f22742ff63" - - -def test_status_cleanup(backend_without_threads): - backend_app = backend_without_threads.app.medallion_backend - # add a status with the current time, which should not be deleted. - new_status = common.generate_status(common.datetime_to_string(datetime.datetime.now()), "pending", 0, 0, 0) - backend_app._add_status('trustgroup1', new_status) - statuses = backend_app._get_api_root_statuses('trustgroup1') - assert backend_without_threads.count(statuses) == 3 + tmpfile.seek(0) + data = json.load(tmpfile) + assert "472c94ae-3113-4e3e-a4dd-a9f4ac7471d4" in data['trustgroup1']['collections'] + assert "365fed99-08fa-fdcd-a1b3-fb247eb41d01" in data['trustgroup1']['collections'] + assert "91a7b528-80eb-42ed-a74d-c6fbd5a26116" in data['trustgroup1']['collections'] + assert "52892447-4d7e-4f70-b94d-d7f22742ff63" in data['trustgroup1']['collections'] + + +@pytest.fixture(params=TestServers) +def flask_app_without_threads(mongo_client, backup_filter_settings, request): + configuration = { + "backend": { + "filename": DATA_FILE, + "run_cleanup_threads": False, + "interop_requirements": True + }, + "users": { + "root": "example", + }, + "taxii": { + "max_page_size": 20 + }, + } + + if request.param in request.config.getoption("backends"): + _set_backend(configuration, mongo_client, request) + + app = create_app(configuration) + + yield app + + # Important for releasing backend resources + app.medallion_backend.close() + + else: + pytest.skip() + + +def test_status_cleanup(flask_app_without_threads): + test_client = flask_app_without_threads.test_client() + + # add a status with the current time (by doing something which generates + # one), which should not be deleted. + new_status_resp = test_client.post( + test.ADD_OBJECTS_EP, + json=TEST_OBJECT, + headers=POST_HEADERS, + auth=("root", "example") + ) + + assert new_status_resp.status_code == 202 + + expected_status_ids = [ + new_status_resp.json["id"], + "2d086da7-4bdc-4f91-900e-d77486753710", + "2d086da7-4bdc-4f91-900e-f4566be4b780" + ] + + resps = [ + test_client.get( + test.API_ROOT_EP + "status/" + status_id + "/", + headers=GET_HEADERS, + auth=("root", "example") + ) + for status_id in expected_status_ids + ] + + resp_status_codes = [ + resp.status_code for resp in resps + ] + + assert resp_status_codes == [200, 200, 200] + + backend_app = flask_app_without_threads.medallion_backend backend_app.status_retention = SECONDS_IN_24_HOURS backend_app._pop_old_statuses() - assert backend_without_threads.count(statuses) == 1 + + resps = [ + test_client.get( + test.API_ROOT_EP + "status/" + status_id + "/", + headers=GET_HEADERS, + auth=("root", "example") + ) + for status_id in expected_status_ids + ] + + resp_status_codes = [ + resp.status_code for resp in resps + ] + + assert resp_status_codes == [200, 404, 404] -def test_get_objects_match_type_version(backend): - r = backend.client.get( +def test_get_objects_match_type_version(test_client): + r = test_client.get( test.GET_OBJECTS_EP + "?match[type]=indicator&match[version]=2017-01-27T13:49:53.935Z", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) obj = r.json @@ -1482,7 +1811,7 @@ def test_get_objects_match_type_version(backend): assert obj['objects'][0]['id'] == 'indicator--6770298f-0fd8-471a-ab8c-1c658a46574e' -def test_get_objects_match_type_spec_version(backend): +def test_get_objects_match_type_spec_version(test_client): object_id = "indicator--68794cd5-28db-429d-ab1e-1256704ef906" newobj = { "objects": [ @@ -1497,14 +1826,16 @@ def test_get_objects_match_type_spec_version(backend): ] } - backend.client.post( + test_client.post( test.GET_OBJECTS_EP, data=json.dumps(copy.deepcopy(newobj)), - headers=backend.post_headers + headers=POST_HEADERS, + auth=("admin", "Password0") ) - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "?match[type]=indicator&match[spec_version]=2.1", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) obj = r.json @@ -1518,9 +1849,10 @@ def test_get_objects_match_type_spec_version(backend): assert obj['objects'][1]['id'] == "indicator--6770298f-0fd8-471a-ab8c-1c658a46574e" assert obj['objects'][1]['spec_version'] == "2.1" - r = backend.client.get( + r = test_client.get( test.GET_OBJECTS_EP + "?match[type]=indicator&match[spec_version]=2.0", - headers=backend.headers, + headers=GET_HEADERS, + auth=("admin", "Password0") ) obj = r.json @@ -1530,3 +1862,132 @@ def test_get_objects_match_type_spec_version(backend): assert obj['objects'][0]['type'] == "indicator" assert obj['objects'][0]['id'] == object_id assert obj['objects'][0]['spec_version'] == "2.0" + + +def test_interop_tier1_filters(test_client): + r = test_client.get( + test.GET_OBJECTS_EP + "?match[pattern_type]=stix", + headers=GET_HEADERS, + auth=("admin", "Password0") + ) + + assert r.status_code == 200 + assert r.content_type == MEDIA_TYPE_TAXII_V21 + objs = r.json + assert objs['more'] is False + assert len(objs['objects']) == 2 + assert all("indicator" == obj["type"] for obj in objs["objects"]) + + +def test_interop_tier2_filters(test_client): + r = test_client.get( + test.GET_OBJECTS_EP + "?match[malware_types]=remote-access-trojan", + headers=GET_HEADERS, + auth=("admin", "Password0") + ) + + assert r.status_code == 200 + assert r.content_type == MEDIA_TYPE_TAXII_V21 + objs = r.json + assert objs['more'] is False + + obj_ids = [obj["id"] for obj in objs["objects"]] + assert obj_ids == ["malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec"] + + +def test_interop_tier3_filters(test_client): + r = test_client.get( + test.GET_OBJECTS_EP + "?match[tlp]=green", + headers=GET_HEADERS, + auth=("admin", "Password0") + ) + + assert r.status_code == 200 + assert r.content_type == MEDIA_TYPE_TAXII_V21 + objs = r.json + assert objs['more'] is False + + obj_ids = [obj["id"] for obj in objs["objects"]] + assert obj_ids == ["indicator--cd981c25-8042-4166-8945-51178443bdac"] + + +def test_interop_relationship_filters(test_client): + r = test_client.get( + test.GET_OBJECTS_EP + "?match[relationships-all]=malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", + headers=GET_HEADERS, + auth=("admin", "Password0") + ) + + assert r.status_code == 200 + assert r.content_type == MEDIA_TYPE_TAXII_V21 + objs = r.json + assert objs['more'] is False + + obj_ids = [obj["id"] for obj in objs["objects"]] + assert obj_ids == ["relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463"] + + +def test_interop_calculation_filters(test_client): + r = test_client.get( + test.GET_OBJECTS_EP + "?match[confidence-gte]=10", + headers=GET_HEADERS, + auth=("admin", "Password0") + ) + + assert r.status_code == 200 + assert r.content_type == MEDIA_TYPE_TAXII_V21 + objs = r.json + assert objs['more'] is False + + obj_ids = [obj["id"] for obj in objs["objects"]] + assert obj_ids == ["malware-analysis--084a658c-a7ef-4581-a21d-1f600908741b"] + + +@pytest.fixture(params=TestServers) +def test_client_no_interop(mongo_client, backup_filter_settings, request): + configuration = { + "backend": { + "filename": DATA_FILE, + "interop_requirements": False + }, + "users": { + "admin": "Password0", + }, + "taxii": { + "max_page_size": 20 + }, + } + + if request.param in request.config.getoption("backends"): + _set_backend(configuration, mongo_client, request) + flask_app = create_app(configuration) + + yield flask_app.test_client() + + flask_app.medallion_backend.close() + + else: + pytest.skip() + + +@pytest.mark.parametrize("query", [ + "match[pattern_type]=stix", + "match[malware_types]=remote-access-trojan", + "match[tlp]=green", + "match[relationships-all]=malware--c0931cc6-c75e-47e5-9036-78fabc95d4ec", + "match[confidence-gte]=10" +]) +def test_interop_filters_interop_disabled(test_client_no_interop, query): + # With interop filters disabled, all of these queries should return the + # same thing. + r = test_client_no_interop.get( + test.GET_OBJECTS_EP + "?" + query, + headers=GET_HEADERS, + auth=("admin", "Password0") + ) + + assert r.status_code == 200 + assert r.content_type == MEDIA_TYPE_TAXII_V21 + objs = r.json + assert objs['more'] is False + assert len(objs['objects']) == 6 diff --git a/medallion/test/test_memory_backend.py b/medallion/test/test_memory_backend.py new file mode 100644 index 00000000..574c31f3 --- /dev/null +++ b/medallion/test/test_memory_backend.py @@ -0,0 +1,784 @@ +import json +import operator +import tempfile + +import pytest + +from medallion import exceptions +import medallion.backends.memory_backend +import medallion.common +import medallion.filters.common +import medallion.filters.memory_filter + + +def test_memory_backend_malformed_datafile(): + + content = { + "/discovery": {}, + "apiroot": { + "collections": { + "00000000-0000-0000-0000-000000000000": { + "objects": [ + { + "id": "foo", + "__meta": {} + } + ] + } + } + } + } + + with tempfile.TemporaryFile(mode="w+", encoding="utf-8") as fp: + json.dump(content, fp) + + # Exercise the error about missing date_added in object meta + fp.seek(0) + with pytest.raises(exceptions.MemoryBackendError): + medallion.backends.memory_backend.MemoryBackend( + filename=fp + ) + + # Add date_added; try now to exercise the error about missing + # media_type. + content["apiroot"]["collections"]["00000000-0000-0000-0000-000000000000"]["objects"][0]["__meta"]["date_added"] = "tomorrow" + fp.seek(0) + json.dump(content, fp) + fp.truncate() + + fp.seek(0) + with pytest.raises(exceptions.MemoryBackendError): + medallion.backends.memory_backend.MemoryBackend( + filename=fp + ) + + +def test_meta_repr(): + meta = medallion.backends.memory_backend.Meta( + "1952-06-14T11:28:21.123456Z", + "application/stix+json;version=2.1", + "2006-11-28T07:01:52.654321Z" + ) + + assert repr(meta) == 'Meta("1952-06-14T11:28:21.123456Z", "application/stix+json;version=2.1", "2006-11-28T07:01:52.654321Z")' + + +def test_toplevel_property_matcher(): + matcher = medallion.filters.memory_filter.TopLevelPropertyMatcher( + "type", filter_info=medallion.filters.common.TAXII_STRING_FILTER + ) + + obj = { + "type": "foo" + } + + assert matcher.match(obj, {"foo"}) + assert matcher.match(obj, {"foo", "bar"}) + assert matcher.match(obj, ["foo", "bar"]) + assert not matcher.match(obj, {"bar"}) + + del obj["type"] + + assert not matcher.match(obj, {"foo"}) + + +def test_toplevel_property_matcher_list(): + matcher = medallion.filters.memory_filter.TopLevelPropertyMatcher( + "names", filter_info=medallion.filters.common.TAXII_STRING_FILTER + ) + + obj = { + "names": ["alice", "bob"] + } + + assert matcher.match(obj, {"alice"}) + assert matcher.match(obj, {"alice", "carol"}) + assert not matcher.match(obj, {"carol"}) + + +def test_toplevel_property_matcher_coerce(): + matcher = medallion.filters.memory_filter.TopLevelPropertyMatcher( + "confidence", filter_info=medallion.filters.common.TAXII_INTEGER_FILTER + ) + + obj = { + "confidence": "01" + } + + match_values = matcher.coerce_values(["04"]) + assert match_values == {4} + + assert matcher.match(obj, {1}) + assert matcher.match(obj, {1, 2}) + assert not matcher.match(obj, {2}) + + +def test_toplevel_property_matcher_type_mismatch(): + matcher = medallion.filters.memory_filter.TopLevelPropertyMatcher( + "confidence", filter_info=medallion.filters.common.TAXII_INTEGER_FILTER + ) + + obj = { + "confidence": "foo" + } + + assert not matcher.match(obj, {1}) + + obj = { + "confidence": ["foo", "bar"] + } + + assert not matcher.match(obj, {1}) + + +def test_sub_property_matcher(): + matcher = medallion.filters.memory_filter.SubPropertyMatcher( + "foo", filter_info=medallion.filters.common.TAXII_INTEGER_FILTER + ) + + assert not matcher.match({ + "foo": "04" + }, {4}) + assert matcher.match({ + "bar": { + "foo": "04" + } + }, {4, 8}) + assert matcher.match({ + "bar": { + "baz": { + "foo": "04" + } + } + }, {4, 8}) + assert matcher.match({ + "bar": { + "foo": "99", + "baz": { + "foo": "04" + } + } + }, {4, 8}) + assert not matcher.match({ + "bar": { + "foo": "04" + } + }, {2}) + assert not matcher.match({ + "bar": { + "foo": ["04"] + } + }, {4}) + assert matcher.match({ + "bar": [ + { + "foo": "04" + }, + { + "foo": "99" + } + ] + }, {5, 4}) + assert not matcher.match({ + "__meta": { + "foo": 4 + } + }, {4}) + assert not matcher.match({ + "bar": { + "foo": "not_an_int" + } + }, {1}) + + +def test_tlp_matcher(): + + matcher = medallion.filters.memory_filter.TLPMatcher() + + white = medallion.filters.common.tlp_short_name_to_id("white") + green = medallion.filters.common.tlp_short_name_to_id("green") + amber = medallion.filters.common.tlp_short_name_to_id("amber") + red = medallion.filters.common.tlp_short_name_to_id("red") + + obj = { + "object_marking_refs": [green], + "granular_markings": [ + { + "marking_ref": red, + "selectors": [] + } + ] + } + + assert matcher.match(obj, {green}) + assert matcher.match(obj, {red}) + assert matcher.match(obj, {red, white}) + assert not matcher.match(obj, {amber}) + assert not matcher.match(obj, {"foo"}) + assert not matcher.match(obj, {amber, "foo"}) + + +def test_ref_property_matcher(): + + matcher = medallion.filters.memory_filter.RelationshipsAllMatcher() + + obj = { + "prop": [ + { + "foo_ref": "a--1" + }, + { + "foo_ref": "b--2" + } + ] + } + + assert matcher.match(obj, {"a--1"}) + assert matcher.match(obj, {"b--2"}) + assert matcher.match(obj, {"a--1", "c--3"}) + assert not matcher.match(obj, {"c--3"}) + assert not matcher.match(obj, {"c--3", "d--4"}) + + +def test_refs_property_matcher(): + + matcher = medallion.filters.memory_filter.RelationshipsAllMatcher() + + obj = { + "prop": [ + { + "foo_refs": ["a--1", "b--2"] + }, + { + "foo_refs": ["c--3", "d--4"] + } + ] + } + + assert matcher.match(obj, {"a--1"}) + assert matcher.match(obj, {"b--2"}) + assert matcher.match(obj, {"c--3"}) + assert matcher.match(obj, {"d--4", "f--6"}) + assert not matcher.match(obj, {"f--6", "g--7"}) + + +def test_calculation_matcher(): + + matcher = medallion.filters.memory_filter.CalculationMatcher( + "foo", operator.gt, + filter_info=medallion.filters.common.TAXII_INTEGER_FILTER + ) + + obj = { + "foo": "05", + "bar": { + "foo": "007" + }, + "__meta": { + "foo": 8 + } + } + + assert matcher.match(obj, {3}) + assert matcher.match(obj, {3, 6}) + assert matcher.match(obj, {6}) + assert not matcher.match(obj, {7}) + + obj = { + "foo": "not_an_int" + } + + assert not matcher.match(obj, {3}) + + +def test_added_after_matcher(): + matcher = medallion.filters.memory_filter.AddedAfterMatcher() + + obj = { + "someprop": "somevalue", + "__meta": { + "date_added": "1991-02-09T08:28:23.474Z", + "media_type": "application/stix+json;version=2.1" + } + } + + medallion.backends.memory_backend._metafy_object(obj) + + assert matcher.match(obj, { + medallion.common.string_to_datetime("1981-08-01T05:14:03.489Z") + }) + assert not matcher.match(obj, { + medallion.common.string_to_datetime("1992-02-01T20:05:17.485Z") + }) + + +def test_spec_version_matcher(): + # Three spec versions of the same object (as identified by ID), + # and one different object. + objs = [ + { + "id": "id--1", + "modified": "1976-03-02T11:21:56.624Z", + "__meta": { + "date_added": "1987-05-27T16:37:09.111Z", + "media_type": "application/stix+json;version=2.0" + } + }, + { + "id": "id--1", + "modified": "1979-08-09T14:57:29.634Z", + "__meta": { + "date_added": "1995-09-15T17:57:40.692Z", + "media_type": "application/stix+json;version=2.2" + } + }, + { + "id": "id--1", + "modified": "1989-11-15T16:13:20.523Z", + "__meta": { + "date_added": "1996-01-17T14:09:36.932Z", + "media_type": "application/stix+json;version=2.10" + } + }, + { + "id": "id--2", + "modified": "1999-07-14T22:52:47.345Z", + "__meta": { + "date_added": "2000-03-24T20:40:05.295Z", + "media_type": "application/stix+json;version=4.2" + } + } + ] + + for obj in objs: + medallion.backends.memory_backend._metafy_object(obj) + + spec_matcher = medallion.filters.memory_filter.SpecVersionMatcher(objs) + + assert spec_matcher.match(objs[0], ["2.0"]) + assert spec_matcher.match(objs[0], ["2.0", "88.88"]) + assert not spec_matcher.match(objs[0], ["2.2", "12.34"]) + assert spec_matcher.match(objs[3], ["4.2"]) + + assert spec_matcher.match(objs[2], None) + assert spec_matcher.match(objs[3], None) + assert not spec_matcher.match(objs[1], None) + + +def test_version_matcher(): + # Three versions of the same object (as identified by ID), + # and one different object. + objs = [ + { + "id": "id--1", + "modified": "1977-01-16T06:59:55.589Z", + "__meta": { + "date_added": "1996-01-17T14:09:36.932Z", + "media_type": "application/stix+json;version=2.0" + } + }, + { + "id": "id--1", + "modified": "1991-05-31T06:22:51.473Z", + "__meta": { + "date_added": "1995-09-15T17:57:40.692Z", + "media_type": "application/stix+json;version=2.2" + } + }, + { + "id": "id--1", + "modified": "1996-08-06T03:08:59.121Z", + "__meta": { + "date_added": "1987-05-27T16:37:09.111Z", + "media_type": "application/stix+json;version=2.10" + } + }, + { + "id": "id--2", + "modified": "1999-07-14T22:52:47.345Z", + "__meta": { + "date_added": "2000-03-24T20:40:05.295Z", + "media_type": "application/stix+json;version=2.2" + } + } + ] + + for obj in objs: + medallion.backends.memory_backend._metafy_object(obj) + + version_matcher = medallion.filters.memory_filter.VersionMatcher(objs) + + assert version_matcher.match( + objs[0], [ + medallion.common.string_to_datetime("1977-01-16T06:59:55.589Z") + ] + ) + assert version_matcher.match( + objs[0], [ + medallion.common.string_to_datetime("1977-01-16T06:59:55.589Z"), + medallion.common.string_to_datetime("1975-06-14T10:13:53.619Z") + ] + ) + assert not version_matcher.match( + objs[0], [ + medallion.common.string_to_datetime("1999-02-10T14:01:40.234Z"), + medallion.common.string_to_datetime("1975-06-14T10:13:53.619Z") + ] + ) + assert version_matcher.match( + objs[3], [ + medallion.common.string_to_datetime("1999-07-14T22:52:47.345Z"), + ] + ) + + assert version_matcher.match(objs[0], ["first"]) + assert version_matcher.match(objs[0], ["first", "last"]) + assert not version_matcher.match(objs[1], ["first", "last"]) + assert version_matcher.match(objs[2], ["first", "last"]) + assert version_matcher.match(objs[0], ["all"]) + assert not version_matcher.match(objs[0], ["last"]) + assert version_matcher.match(objs[2], ["last"]) + + assert version_matcher.match(objs[2], None) + assert version_matcher.match(objs[3], None) + assert not version_matcher.match(objs[1], None) + + +def test_interop_tier1_filter(): + data = [ + { + "id": "aaaaa--f513d13a-383d-49e7-88c2-da80941a86e9", + "created": "1994-02-17T16:10:59.672Z", + "spec_version": "2.1", + "confidence": 10, + "__meta": { + "media_type": "application/stix+json;version=2.1", + "date_added": "1992-02-23T23:33:31.342Z" + } + }, + { + "id": "bbbbb--be960f96-4a5f-4943-926e-5d0c9c8c7a10", + "created": "1978-01-02T06:57:16.129Z", + "__meta": { + "media_type": "application/stix+json;version=2.1", + "date_added": "1993-05-27T23:00:44.323Z" + } + } + ] + + for obj in data: + medallion.backends.memory_backend._metafy_object(obj) + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[confidence]": "10" + }, + interop=True + ) + + results, _, _ = filter.process_filter(data) + + assert results == [data[0]] + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[confidence]": "10,20,30" + }, + interop=True + ) + + results, _, _ = filter.process_filter(data) + + assert results == [data[0]] + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[confidence]": "20" + }, + interop=True + ) + + results, _, _ = filter.process_filter(data) + + assert not results + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[confidence]": "foo" + }, + interop=True + ) + with pytest.raises(exceptions.ProcessingError) as e: + filter.process_filter(data) + + assert e.value.status == 400 + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[confidence]": "10" + }, + interop=False + ) + + results, _, _ = filter.process_filter(data) + + assert all(obj in results for obj in data) + assert len(results) == len(data) + + +def test_interop_tier2_filter(): + data = [ + { + "id": "aaaaa--f513d13a-383d-49e7-88c2-da80941a86e9", + "created": "1994-02-17T16:10:59.672Z", + "spec_version": "2.1", + "labels": ["A", "B", "C"], + "__meta": { + "media_type": "application/stix+json;version=2.1", + "date_added": "1992-02-23T23:33:31.342Z" + } + }, + { + "id": "bbbbb--be960f96-4a5f-4943-926e-5d0c9c8c7a10", + "created": "1978-01-02T06:57:16.129Z", + "labels": ["B", "C", "D"], + "__meta": { + "media_type": "application/stix+json;version=2.0", + "date_added": "1993-05-27T23:00:44.323Z" + } + }, + { + "id": "ccccc--13374fca-c972-4503-a287-7eaeac21a004", + "created": "1990-12-06T18:10:47.496Z", + "__meta": { + "media_type": "application/stix+json;version=2.0", + "date_added": "1993-05-27T23:00:44.323Z" + } + } + ] + + for obj in data: + medallion.backends.memory_backend._metafy_object(obj) + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[labels]": "B" + }, + interop=True + ) + + results, _, _ = filter.process_filter(data) + + assert len(results) == 2 + assert data[0] in results + assert data[1] in results + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[labels]": "X,B,foo" + }, + interop=True + ) + + results, _, _ = filter.process_filter(data) + + assert len(results) == 2 + assert data[0] in results + assert data[1] in results + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[labels]": "Z" + }, + interop=True + ) + + results, _, _ = filter.process_filter(data) + + assert not results + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[labels]": "B" + }, + interop=False + ) + + results, _, _ = filter.process_filter(data) + + assert all(obj in results for obj in data) + assert len(results) == len(data) + + +def test_interop_tier3_filter(): + data = [ + { + "id": "aaaaa--d6b0ab07-8fbe-4503-8943-97417c601cdc", + "foo": { + "address_family": "A" + }, + "__meta": { + "media_type": "application/stix+json;version=2.1", + "date_added": "1980-09-17T07:18:19.141Z" + } + }, + { + "id": "bbbbb--436af759-54fa-4cb4-8395-b0286216e8b6", + "foo": [ + { + "address_family": "A" + } + ], + "__meta": { + "media_type": "application/stix+json;version=2.1", + "date_added": "1979-06-23T07:03:24.893Z" + } + }, + { + "id": "ccccc--38fb2092-23b5-471f-a7c9-5715b995ad85", + "address_family": "A", + "__meta": { + "media_type": "application/stix+json;version=2.1", + "date_added": "1997-12-12T11:29:41.196Z" + } + }, + { + "id": "ddddd--70f0802e-a73f-4b62-829d-6bc3dd0e39a7", + "foo": { + "address_family": "B" + }, + "__meta": { + "media_type": "application/stix+json;version=2.1", + "date_added": "1991-03-25T09:40:56.942Z" + } + } + ] + + for obj in data: + medallion.backends.memory_backend._metafy_object(obj) + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[address_family]": "A,X" + }, + interop=True + ) + + results, _, _ = filter.process_filter(data) + assert data[0] in results + assert data[1] in results + assert data[2] not in results + assert data[3] not in results + + filter = medallion.filters.memory_filter.MemoryFilter( + { + "match[address_family]": "A,X" + }, + interop=False + ) + + results, _, _ = filter.process_filter(data) + + assert all(obj in results for obj in data) + assert len(results) == len(data) + + +def test_filter_order(): + + filters = sorted( + [ + "match[address_family]", + "match[version]", + "match[aliases]", + "match[number]" + ], + key=medallion.filters.memory_filter._speed_tier + ) + + correct_order = [ + "match[number]", + "match[aliases]", + "match[address_family]", + "match[version]" + ] + + assert filters == correct_order + + +def test_revoked_default(): + obj = { + "id": "foo--f127ea5c-4e08-47ee-9e29-c4ef0883b394", + "type": "foo", + "created": "1988-12-05T21:21:50.423Z" + # "revoked" defaults to false + } + + matcher = medallion.filters.memory_filter.TopLevelPropertyMatcher( + "revoked", + default_value=False, + filter_info=medallion.filters.common.TAXII_BOOLEAN_FILTER + ) + + assert matcher.match(obj, {False}) + assert matcher.match(obj, {False, True}) + assert not matcher.match(obj, {True}) + + obj_revoked_false = { + "id": "foo--f127ea5c-4e08-47ee-9e29-c4ef0883b394", + "type": "foo", + "created": "1988-12-05T21:21:50.423Z", + "revoked": "false" # will coerce to False + } + + assert matcher.match(obj_revoked_false, {False}) + assert matcher.match(obj_revoked_false, {False, True}) + assert not matcher.match(obj_revoked_false, {True}) + + obj_revoked_true = { + "id": "foo--f127ea5c-4e08-47ee-9e29-c4ef0883b394", + "type": "foo", + "created": "1988-12-05T21:21:50.423Z", + "revoked": "true" # will coerce to True + } + + assert not matcher.match(obj_revoked_true, {False}) + assert matcher.match(obj_revoked_true, {False, True}) + assert matcher.match(obj_revoked_true, {True}) + + +def test_toplevel_property_matcher_list_default(): + obj = { + "id": "foo--f127ea5c-4e08-47ee-9e29-c4ef0883b394", + "type": "foo", + "created": "1988-12-05T21:21:50.423Z" + } + + # defaulted property is "defaulted" + matcher = medallion.filters.memory_filter.TopLevelPropertyMatcher( + "defaulted", + filter_info=medallion.filters.common.TAXII_INTEGER_FILTER, + default_value=[1, "2", 3] + ) + + assert matcher.match( + obj, {2, 4, 6} + ) + + assert not matcher.match( + obj, {6, 7, 8} + ) + + obj_with_defaulted = { + "id": "foo--f127ea5c-4e08-47ee-9e29-c4ef0883b394", + "type": "foo", + "created": "1988-12-05T21:21:50.423Z", + "defaulted": [4, "5", 6] + } + + assert matcher.match( + obj_with_defaulted, {5, 10, 15} + ) + + assert not matcher.match( + obj_with_defaulted, {1, 2, 3} + ) diff --git a/medallion/test/test_startup.py b/medallion/test/test_startup.py index 33de7458..b5cc63ac 100644 --- a/medallion/test/test_startup.py +++ b/medallion/test/test_startup.py @@ -1,5 +1,6 @@ import logging from unittest import mock +import unittest.mock import pytest import pytest_subtests # noqa: F401 @@ -147,12 +148,13 @@ class ExpectedException(BaseException): assert "not allowed with argument" in msg -@mock.patch("medallion.APPLICATION_INSTANCE.run") -def test_confcheck(mock_app, subtests): +def test_confcheck(subtests): class ExpectedException(BaseException): pass + mock_app = unittest.mock.MagicMock() + """ Confirm that the --conf-check option works as expected. """ @@ -161,11 +163,13 @@ class ExpectedException(BaseException): "medallion.scripts.run.log" ) as mock_logger, mock.patch( "sys.argv", ["ARGV0", "-c", "medallion/test/data/config.json"] + ), mock.patch( + "medallion.scripts.run.create_app", return_value=mock_app ): medallion.scripts.run.main() # default `--log-level` value mock_logger.setLevel.assert_called_once_with("WARN") - assert mock_app.call_count == 1 + assert mock_app.run.call_count == 1 mock_app.reset_mock() with subtests.test(msg="--conf-check provided without a value"): @@ -173,10 +177,12 @@ class ExpectedException(BaseException): "medallion.scripts.run.log" ) as mock_logger, mock.patch( "sys.argv", ["ARGV0", "--conf-check", "-c", "medallion/test/data/config.json"] + ), mock.patch( + "medallion.scripts.run.create_app", return_value=mock_app ): medallion.scripts.run.main() mock_logger.setLevel.assert_called_once_with(logging.DEBUG) - mock_app.assert_not_called() + mock_app.run.assert_not_called() mock_app.reset_mock() with subtests.test(msg="--conf-check with equals"): @@ -197,10 +203,12 @@ class ExpectedException(BaseException): "medallion.scripts.run.log" ) as mock_logger, mock.patch( "sys.argv", ["ARGV0", "--conf-check", "--log-level=CRITICAL", "-c", "medallion/test/data/config.json"] + ), mock.patch( + "medallion.scripts.run.create_app", return_value=mock_app ): medallion.scripts.run.main() mock_logger.setLevel.assert_called_once_with(logging.DEBUG) - mock_app.assert_not_called() + mock_app.run.assert_not_called() def test_main_config_arg_handling(subtests): @@ -217,13 +225,15 @@ def test_main_config_arg_handling(subtests): "debug": False, } + mock_app = unittest.mock.MagicMock() + with mock.patch( - "medallion.scripts.run.APPLICATION_INSTANCE", - ) as mock_app, mock.patch( + "medallion.scripts.run.create_app", return_value=mock_app + ), mock.patch( "medallion.current_app", new=mock_app, ), mock.patch( "medallion.config.load_config", return_value=safe_config, - ) as mock_load_config, mock.patch("medallion.scripts.run.APPLICATION_INSTANCE.backend_config", None): + ) as mock_load_config: with subtests.test(msg="No config args provided"): with mock.patch( "sys.argv", ["ARGV0"] diff --git a/medallion/views/manifest.py b/medallion/views/manifest.py index 35367904..6b576ba6 100644 --- a/medallion/views/manifest.py +++ b/medallion/views/manifest.py @@ -33,7 +33,7 @@ def get_object_manifest(api_root, collection_id): limit = validate_limit_parameter() manifests, headers = current_app.medallion_backend.get_object_manifest( - api_root, collection_id, request.args.to_dict(), ("id", "type", "version", "spec_version"), limit + api_root, collection_id, request.args.to_dict(), limit ) return Response( diff --git a/medallion/views/objects.py b/medallion/views/objects.py index 1d30c7c8..a31d2da2 100644 --- a/medallion/views/objects.py +++ b/medallion/views/objects.py @@ -113,7 +113,7 @@ def get_or_add_objects(api_root, collection_id): permission_to_read(api_root, collection_id) limit = validate_limit_parameter() objects, headers = current_app.medallion_backend.get_objects( - api_root, collection_id, request.args.to_dict(), ("id", "type", "version", "spec_version"), limit + api_root, collection_id, request.args.to_dict(), limit ) return Response( @@ -165,7 +165,7 @@ def get_or_delete_object(api_root, collection_id, object_id): permission_to_read(api_root, collection_id) limit = validate_limit_parameter() objects, headers = current_app.medallion_backend.get_object( - api_root, collection_id, object_id, request.args.to_dict(), ("version", "spec_version"), limit + api_root, collection_id, object_id, request.args.to_dict(), limit ) if objects or request.args: return Response( @@ -178,7 +178,7 @@ def get_or_delete_object(api_root, collection_id, object_id): elif request.method == "DELETE": permission_to_read_and_write(api_root, collection_id) current_app.medallion_backend.delete_object( - api_root, collection_id, object_id, request.args.to_dict(), ("version", "spec_version"), + api_root, collection_id, object_id, request.args.to_dict(), ) return Response( status=200, @@ -212,7 +212,7 @@ def get_object_versions(api_root, collection_id, object_id): limit = validate_limit_parameter() versions, headers = current_app.medallion_backend.get_object_versions( - api_root, collection_id, object_id, request.args.to_dict(), ("spec_version",), limit + api_root, collection_id, object_id, request.args.to_dict(), limit ) return Response( response=json.dumps(versions), diff --git a/sample-config-with-memory-backend.json b/sample-config-with-memory-backend.json index 3515debb..4fdf9409 100644 --- a/sample-config-with-memory-backend.json +++ b/sample-config-with-memory-backend.json @@ -1,7 +1,7 @@ { "backend": { "module_class": "MemoryBackend", - "filename": "medallion/test/data/default_data.json" + "filename": "../test/data/default_data.json" }, "users": { "admin": "Password0", diff --git a/setup.py b/setup.py index 2bceede0..4aba3cd3 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,6 @@ def get_long_description(): url="https://oasis-open.github.io/cti-documentation/", author="OASIS Cyber Threat Intelligence Technical Committee", author_email="cti-users@lists.oasis-open.org", - maintainer="Emmanuelle Vargas-Gonzalez", - maintainer_email="emmanuelle@mitre.org", license="BSD", classifiers=[ "Development Status :: 3 - Alpha", @@ -40,10 +38,10 @@ def get_long_description(): "Topic :: Security", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", ], keywords="taxii taxii2 server json cti cyber threat intelligence", packages=find_packages(exclude=["*.test", "*.test.data"]), @@ -54,8 +52,7 @@ def get_long_description(): "Flask-HTTPAuth", "jsonmerge", "packaging", - "pytz", - "six", + "pytz" ], entry_points={ "console_scripts": [ diff --git a/tox.ini b/tox.ini index fb9be89e..50e6a960 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py36,py37,py38,py39,packaging,pre-commit-check +envlist = py37,py38,py39,py310,packaging,pre-commit-check [testenv] deps = @@ -32,7 +32,7 @@ commands = [gh-actions] python = - 3.6: py36 3.7: py37 3.8: py38 3.9: py39, packaging, pre-commit-check + 3.10: py310 From 21597fc3a3376ed1a5d57cc5ad2f475fc41871e0 Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Sat, 4 Feb 2023 01:33:01 -0500 Subject: [PATCH 2/3] Fixes to github actions and tox.ini to try to get unit tests running in the workflow again. --- .github/workflows/python-ci-tests.yml | 17 +++++------------ tox.ini | 17 +++++------------ 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/.github/workflows/python-ci-tests.yml b/.github/workflows/python-ci-tests.yml index 0ca1a43d..c9638ede 100644 --- a/.github/workflows/python-ci-tests.yml +++ b/.github/workflows/python-ci-tests.yml @@ -13,28 +13,21 @@ jobs: name: Python ${{ matrix.python-version }} Build steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3.3.0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4.5.0 with: python-version: ${{ matrix.python-version }} - name: Start MongoDB - uses: supercharge/mongodb-github-action@1.3.0 - with: - mongodb-version: 4.0 + uses: supercharge/mongodb-github-action@1.8.0 - name: Install and update essential dependencies run: | pip install -U pip setuptools pip install tox-gh-actions - pip install codecov - - name: Create test user - run: | - mongo admin --eval 'db.createUser({user:"travis",pwd:"test",roles:[{role:"root",db:"admin"}]});' - name: Test with Tox - run: | - tox + run: tox - name: Upload coverage information to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3.1.1 with: fail_ci_if_error: true # optional (default = false) verbose: true # optional (default = false) diff --git a/tox.ini b/tox.ini index 50e6a960..79303318 100644 --- a/tox.ini +++ b/tox.ini @@ -1,19 +1,12 @@ [tox] envlist = py37,py38,py39,py310,packaging,pre-commit-check -[testenv] -deps = - -U - tox - pytest - pytest-cov - pytest-subtests - coverage - responses - pymongo - pyjwt +[testenv:py{37,38,39,310}] +extras = + test + mongo commands = - pytest --cov=medallion medallion/test/ --cov-report term-missing + pytest --cov-report xml --cov-report term-missing --cov=medallion medallion/test/ passenv = GITHUB_* From fc535a48dd29413762775dc9b5a8b1822487929c Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Sat, 4 Feb 2023 02:24:01 -0500 Subject: [PATCH 3/3] Update pre-commit hooks to latest versions, and re-run them --- .pre-commit-config.yaml | 6 +++--- medallion/backends/memory_backend.py | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a1bc7471..9afd04c8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,18 +1,18 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.4.0 hooks: - id: trailing-whitespace - id: check-merge-conflict - repo: https://github.com/PyCQA/flake8 - rev: 3.8.4 + rev: 6.0.0 hooks: - id: flake8 name: Check project styling args: - --max-line-length=160 - repo: https://github.com/PyCQA/isort - rev: 5.7.0 + rev: 5.12.0 hooks: - id: isort name: Sort python imports (shows diff) diff --git a/medallion/backends/memory_backend.py b/medallion/backends/memory_backend.py index 9476d059..8dc80263 100644 --- a/medallion/backends/memory_backend.py +++ b/medallion/backends/memory_backend.py @@ -8,8 +8,7 @@ from ..common import ( create_resource, determine_spec_version, generate_status, generate_status_details, get_timestamp, timestamp_to_datetime, - timestamp_to_epoch_seconds, timestamp_to_stix_json, - timestamp_to_taxii_json + timestamp_to_epoch_seconds, timestamp_to_stix_json, timestamp_to_taxii_json ) from ..exceptions import MemoryBackendError, ProcessingError from ..filters.memory_filter import MemoryFilter