diff --git a/.env.enc b/.env.enc index 5903c8874..6e1f6ead7 100644 Binary files a/.env.enc and b/.env.enc differ diff --git a/examples/discovery_v1.py b/examples/discovery_v1.py index a791eac43..646d6a68c 100644 --- a/examples/discovery_v1.py +++ b/examples/discovery_v1.py @@ -22,10 +22,10 @@ environment_id=news_environment_id) print(json.dumps(configurations, indent=2)) -query_options = {'query': 'IBM'} query_results = discovery.query(news_environment_id, news_collections[0]['collection_id'], - query_options) + filter='extracted_metadata.sha1::f5*', + return_fields='extracted_metadata.sha1') print(json.dumps(query_results, indent=2)) # new_environment = discovery.create_environment(name="new env", description="bogus env") diff --git a/test/integration/test_integration_discovery_v1.py b/test/integration/test_integration_discovery_v1.py new file mode 100644 index 000000000..c011177d4 --- /dev/null +++ b/test/integration/test_integration_discovery_v1.py @@ -0,0 +1,78 @@ +from unittest import TestCase +import os +import watson_developer_cloud +import json + +class Discoveryv1(TestCase): + def setUp(self): + self.discovery = watson_developer_cloud.DiscoveryV1( + version='2017-10-16', + username=os.getenv('SPEECH_TO_TEXT_USERNAME'), + password=os.getenv('SPEECH_TO_TEXT_PASSWORD')) + self.discovery.set_default_headers({'X-Watson-Learning-Opt-Out': '1', 'X-Watson-Test': '1'}) + self.environment_id = 'e15f6424-f887-4f50-b4ea-68267c36fc9c' # This environment is created for integration testing + self.collection_id = self.discovery.list_collections(self.environment_id)['collections'][0]['collection_id'] + + def test_environments(self): + envs = self.discovery.list_environments() + assert envs is not None + env = self.discovery.get_environment(envs['environments'][0]['environment_id']) + assert env is not None + fields = self.discovery.list_fields(self.environment_id, self.collection_id) + assert fields is not None + + def test_configurations(self): + configs = self.discovery.list_configurations(self.environment_id) + assert configs is not None + new_configuration_id = self.discovery.create_configuration(self.environment_id, 'test', 'creating new config for python sdk')['configuration_id'] + assert new_configuration_id is not None + self.discovery.get_configuration(self.environment_id, new_configuration_id) + updated_config = self.discovery.update_configuration(self.environment_id, new_configuration_id, 'lala') + assert updated_config['name'] == 'lala' + deleted_config = self.discovery.delete_configuration(self.environment_id, new_configuration_id) + assert deleted_config['status'] == 'deleted' + + def test_collections_and_expansions(self): + new_collection_id = self.discovery.create_collection(self.environment_id, + name='Example collection for python', + description="Integration test for python sdk")['collection_id'] + assert new_collection_id is not None + self.discovery.get_collection(self.environment_id, new_collection_id) + updated_collection = self.discovery.update_collection(self.environment_id, new_collection_id, name='lala') + assert updated_collection['name'] == 'lala' + + self.discovery.create_expansions(self.environment_id, new_collection_id, [{'input_terms': ['a'], 'expanded_terms': ['aa']}]) + expansions = self.discovery.list_expansions(self.environment_id, new_collection_id) + assert len(expansions['expansions']) > 0 + self.discovery.delete_expansions(self.environment_id, new_collection_id) + + deleted_collection = self.discovery.delete_collection(self.environment_id, new_collection_id) + assert deleted_collection['status'] == 'deleted' + + def test_documents(self): + with open(os.path.join(os.path.dirname(__file__), '../../resources/simple.html'), 'r') as fileinfo: + add_doc = self.discovery.add_document(environment_id=self.environment_id, + collection_id=self.collection_id, + file=fileinfo) + add_doc['document_id'] is not None + + doc_status = self.discovery.get_document_status(self.environment_id, self.collection_id, add_doc['document_id']) + assert doc_status is not None + + with open(os.path.join(os.path.dirname(__file__), '../../resources/simple.html'), 'r') as fileinfo: + update_doc = self.discovery.update_document(self.environment_id, + self.collection_id, + add_doc['document_id'], + file=fileinfo, + filename='newname.html') + assert update_doc is not None + delete_doc = self.discovery.delete_document(self.environment_id, self.collection_id, add_doc['document_id']) + assert delete_doc['status'] == 'deleted' + + + def test_queries(self): + query_results = self.discovery.query(self.environment_id, + self.collection_id, + filter='extracted_metadata.sha1::9181d244*', + return_fields='extracted_metadata.sha1') + assert query_results is not None \ No newline at end of file diff --git a/test/unit/test_discovery_v1.py b/test/unit/test_discovery_v1.py index c28240647..619b5ad2a 100644 --- a/test/unit/test_discovery_v1.py +++ b/test/unit/test_discovery_v1.py @@ -250,7 +250,7 @@ def test_query_relations(): discovery = watson_developer_cloud.DiscoveryV1( '2016-11-07', username='username', password='password') - discovery.query_relations('envid', 'collid', {'count': 10}) + discovery.query_relations('envid', 'collid', count=10) called_url = urlparse(responses.calls[0].request.url) test_url = urlparse(discovery_url) assert called_url.netloc == test_url.netloc @@ -792,7 +792,7 @@ def test_expansions(): discovery.list_expansions('envid', 'colid') assert responses.calls[0].response.json() == {"expansions": "results"} - discovery.create_expansions('envid', 'colid', { "expansions": [{"input_terms": "dumb"}] }) + discovery.create_expansions('envid', 'colid', [{"input_terms": "dumb", "expanded_terms": "dumb2"}]) assert responses.calls[1].response.json() == {"expansions": "success" } discovery.delete_expansions('envid', 'colid') diff --git a/watson_developer_cloud/discovery_v1.py b/watson_developer_cloud/discovery_v1.py index 3c77a7018..9a81dceeb 100644 --- a/watson_developer_cloud/discovery_v1.py +++ b/watson_developer_cloud/discovery_v1.py @@ -36,12 +36,6 @@ class DiscoveryV1(WatsonService): """The Discovery V1 service.""" default_url = 'https://gateway.watsonplatform.net/discovery/api' - VERSION_DATE_2017_11_07 = '2017-11-07' - VERSION_DATE_2017_09_01 = '2017-09-01' - VERSION_DATE_2017_08_01 = '2017-08-01' - VERSION_DATE_2017_07_19 = '2017-07-19' - VERSION_DATE_2017_06_25 = '2017-06-25' - VERSION_DATE_2016_12_01 = '2016-12-01' def __init__(self, version, url=default_url, username=None, password=None): """ @@ -163,10 +157,10 @@ def list_environments(self, name=None): def list_fields(self, environment_id, collection_ids): """ - List fields in specified collecitons. + List fields in specified collections. Gets a list of the unique fields (and their types) stored in the indexes of the - specified collecitons. + specified collections. :param str environment_id: The ID of the environment. :param list[str] collection_ids: A comma-separated list of collection IDs to be queried against. @@ -247,11 +241,16 @@ def create_configuration(self, if name is None: raise ValueError('name must be provided') if conversions is not None: - conversions = self._convert_model(conversions) + conversions = self._convert_model(conversions, Conversions) if enrichments is not None: - enrichments = [self._convert_model(x) for x in enrichments] + enrichments = [ + self._convert_model(x, Enrichment) for x in enrichments + ] if normalizations is not None: - normalizations = [self._convert_model(x) for x in normalizations] + normalizations = [ + self._convert_model(x, NormalizationOperation) + for x in normalizations + ] params = {'version': self.version} data = { 'name': name, @@ -369,11 +368,16 @@ def update_configuration(self, if name is None: raise ValueError('name must be provided') if conversions is not None: - conversions = self._convert_model(conversions) + conversions = self._convert_model(conversions, Conversions) if enrichments is not None: - enrichments = [self._convert_model(x) for x in enrichments] + enrichments = [ + self._convert_model(x, Enrichment) for x in enrichments + ] if normalizations is not None: - normalizations = [self._convert_model(x) for x in normalizations] + normalizations = [ + self._convert_model(x, NormalizationOperation) + for x in normalizations + ] params = {'version': self.version} data = { 'name': name, @@ -512,7 +516,7 @@ def create_expansions(self, environment_id, collection_id, expansions): raise ValueError('collection_id must be provided') if expansions is None: raise ValueError('expansions must be provided') - expansions = [self._convert_model(x) for x in expansions] + expansions = [self._convert_model(x, Expansion) for x in expansions] params = {'version': self.version} data = {'expansions': expansions} url = '/v1/environments/{0}/collections/{1}/expansions'.format( @@ -869,7 +873,10 @@ def federated_query(self, sort=None, highlight=None, deduplicate=None, - deduplicate_field=None): + deduplicate_field=None, + similar=None, + similar_document_ids=None, + similar_fields=None): """ Query documents in multiple collections. @@ -890,6 +897,9 @@ def federated_query(self, :param bool highlight: When true a highlight field is returned for each result which contains the fields that match the query with `` tags around the matching query terms. Defaults to false. :param bool deduplicate: When `true` and used with a Watson Discovery News collection, duplicate results (based on the contents of the `title` field) are removed. Duplicate comparison is limited to the current query only, `offset` is not considered. Defaults to `false`. This parameter is currently Beta functionality. :param str deduplicate_field: When specified, duplicate results based on the field specified are removed from the returned results. Duplicate comparison is limited to the current query only, `offset` is not considered. This parameter is currently Beta functionality. + :param bool similar: When `true`, results are returned based on their similarity to the document IDs specified in the `similar.document_ids` parameter. The default is `false`. + :param list[str] similar_document_ids: A comma-separated list of document IDs that will be used to find similar documents. **Note:** If the `natural_language_query` parameter is also specified, it will be used to expand the scope of the document similarity search to include the natural language query. Other query parameters, such as `filter` and `query` are subsequently applied and reduce the query scope. + :param list[str] similar_fields: A comma-separated list of field names that will be used as a basis for comparison to identify similar documents. If not specified, the entire document is used for comparison. :return: A `dict` containing the `QueryResponse` response. :rtype: dict """ @@ -910,7 +920,10 @@ def federated_query(self, 'sort': self._convert_list(sort), 'highlight': highlight, 'deduplicate': deduplicate, - 'deduplicate.field': deduplicate_field + 'deduplicate.field': deduplicate_field, + 'similar': similar, + 'similar.document_ids': self._convert_list(similar_document_ids), + 'similar.fields': self._convert_list(similar_fields) } url = '/v1/environments/{0}/query'.format( *self._encode_path_vars(environment_id)) @@ -930,7 +943,10 @@ def federated_query_notices(self, offset=None, sort=None, highlight=None, - deduplicate_field=None): + deduplicate_field=None, + similar=None, + similar_document_ids=None, + similar_fields=None): """ Query multiple collection system notices. @@ -952,6 +968,9 @@ def federated_query_notices(self, :param list[str] sort: A comma separated list of fields in the document to sort on. You can optionally specify a sort direction by prefixing the field with `-` for descending or `+` for ascending. Ascending is the default sort direction if no prefix is specified. :param bool highlight: When true a highlight field is returned for each result which contains the fields that match the query with `` tags around the matching query terms. Defaults to false. :param str deduplicate_field: When specified, duplicate results based on the field specified are removed from the returned results. Duplicate comparison is limited to the current query only, `offset` is not considered. This parameter is currently Beta functionality. + :param bool similar: When `true`, results are returned based on their similarity to the document IDs specified in the `similar.document_ids` parameter. The default is `false`. + :param list[str] similar_document_ids: A comma-separated list of document IDs that will be used to find similar documents. **Note:** If the `natural_language_query` parameter is also specified, it will be used to expand the scope of the document similarity search to include the natural language query. Other query parameters, such as `filter` and `query` are subsequently applied and reduce the query scope. + :param list[str] similar_fields: A comma-separated list of field names that will be used as a basis for comparison to identify similar documents. If not specified, the entire document is used for comparison. :return: A `dict` containing the `QueryNoticesResponse` response. :rtype: dict """ @@ -971,7 +990,10 @@ def federated_query_notices(self, 'offset': offset, 'sort': self._convert_list(sort), 'highlight': highlight, - 'deduplicate.field': deduplicate_field + 'deduplicate.field': deduplicate_field, + 'similar': similar, + 'similar.document_ids': self._convert_list(similar_document_ids), + 'similar.fields': self._convert_list(similar_fields) } url = '/v1/environments/{0}/notices'.format( *self._encode_path_vars(environment_id)) @@ -996,7 +1018,10 @@ def query(self, passages_count=None, passages_characters=None, deduplicate=None, - deduplicate_field=None): + deduplicate_field=None, + similar=None, + similar_document_ids=None, + similar_fields=None): """ Query documents. @@ -1021,6 +1046,9 @@ def query(self, :param int passages_characters: The approximate number of characters that any one passage will have. The default is `400`. The minimum is `50`. The maximum is `2000`. :param bool deduplicate: When `true` and used with a Watson Discovery News collection, duplicate results (based on the contents of the `title` field) are removed. Duplicate comparison is limited to the current query only, `offset` is not considered. Defaults to `false`. This parameter is currently Beta functionality. :param str deduplicate_field: When specified, duplicate results based on the field specified are removed from the returned results. Duplicate comparison is limited to the current query only, `offset` is not considered. This parameter is currently Beta functionality. + :param bool similar: When `true`, results are returned based on their similarity to the document IDs specified in the `similar.document_ids` parameter. The default is `false`. + :param list[str] similar_document_ids: A comma-separated list of document IDs that will be used to find similar documents. **Note:** If the `natural_language_query` parameter is also specified, it will be used to expand the scope of the document similarity search to include the natural language query. Other query parameters, such as `filter` and `query` are subsequently applied and reduce the query scope. + :param list[str] similar_fields: A comma-separated list of field names that will be used as a basis for comparison to identify similar documents. If not specified, the entire document is used for comparison. :return: A `dict` containing the `QueryResponse` response. :rtype: dict """ @@ -1044,7 +1072,10 @@ def query(self, 'passages.count': passages_count, 'passages.characters': passages_characters, 'deduplicate': deduplicate, - 'deduplicate.field': deduplicate_field + 'deduplicate.field': deduplicate_field, + 'similar': similar, + 'similar.document_ids': self._convert_list(similar_document_ids), + 'similar.fields': self._convert_list(similar_fields) } url = '/v1/environments/{0}/collections/{1}/query'.format( *self._encode_path_vars(environment_id, collection_id)) @@ -1080,9 +1111,9 @@ def query_entities(self, if collection_id is None: raise ValueError('collection_id must be provided') if entity is not None: - entity = self._convert_model(entity) + entity = self._convert_model(entity, QueryEntitiesEntity) if context is not None: - context = self._convert_model(context) + context = self._convert_model(context, QueryEntitiesContext) params = {'version': self.version} data = { 'feature': feature, @@ -1112,7 +1143,10 @@ def query_notices(self, passages_fields=None, passages_count=None, passages_characters=None, - deduplicate_field=None): + deduplicate_field=None, + similar=None, + similar_document_ids=None, + similar_fields=None): """ Query system notices. @@ -1138,6 +1172,9 @@ def query_notices(self, :param int passages_count: The maximum number of passages to return. The search returns fewer passages if the requested total is not found. The default is `10`. The maximum is `100`. :param int passages_characters: The approximate number of characters that any one passage will have. The default is `400`. The minimum is `50`. The maximum is `2000`. :param str deduplicate_field: When specified, duplicate results based on the field specified are removed from the returned results. Duplicate comparison is limited to the current query only, `offset` is not considered. This parameter is currently Beta functionality. + :param bool similar: When `true`, results are returned based on their similarity to the document IDs specified in the `similar.document_ids` parameter. The default is `false`. + :param list[str] similar_document_ids: A comma-separated list of document IDs that will be used to find similar documents. **Note:** If the `natural_language_query` parameter is also specified, it will be used to expand the scope of the document similarity search to include the natural language query. Other query parameters, such as `filter` and `query` are subsequently applied and reduce the query scope. + :param list[str] similar_fields: A comma-separated list of field names that will be used as a basis for comparison to identify similar documents. If not specified, the entire document is used for comparison. :return: A `dict` containing the `QueryNoticesResponse` response. :rtype: dict """ @@ -1160,7 +1197,10 @@ def query_notices(self, 'passages.fields': self._convert_list(passages_fields), 'passages.count': passages_count, 'passages.characters': passages_characters, - 'deduplicate.field': deduplicate_field + 'deduplicate.field': deduplicate_field, + 'similar': similar, + 'similar.document_ids': self._convert_list(similar_document_ids), + 'similar.fields': self._convert_list(similar_fields) } url = '/v1/environments/{0}/collections/{1}/notices'.format( *self._encode_path_vars(environment_id, collection_id)) @@ -1198,11 +1238,13 @@ def query_relations(self, if collection_id is None: raise ValueError('collection_id must be provided') if entities is not None: - entities = [self._convert_model(x) for x in entities] + entities = [ + self._convert_model(x, QueryRelationsEntity) for x in entities + ] if context is not None: - context = self._convert_model(context) + context = self._convert_model(context, QueryEntitiesContext) if filter is not None: - filter = self._convert_model(filter) + filter = self._convert_model(filter, QueryRelationsFilter) params = {'version': self.version} data = { 'entities': entities, @@ -1244,7 +1286,9 @@ def add_training_data(self, if collection_id is None: raise ValueError('collection_id must be provided') if examples is not None: - examples = [self._convert_model(x) for x in examples] + examples = [ + self._convert_model(x, TrainingExample) for x in examples + ] params = {'version': self.version} data = { 'natural_language_query': natural_language_query, @@ -2681,6 +2725,8 @@ def _from_dict(cls, _dict): ) if 'overwrite' in _dict: args['overwrite'] = _dict['overwrite'] + if 'enrichment_name' in _dict: + args['enrichment_name'] = _dict['enrichment_name'] if 'enrichment' in _dict: args['enrichment_name'] = _dict['enrichment'] else: @@ -3078,8 +3124,12 @@ def __init__(self, field_name=None, field_type=None): def _from_dict(cls, _dict): """Initialize a Field object from a json dictionary.""" args = {} + if 'field_name' in _dict: + args['field_name'] = _dict['field_name'] if 'field' in _dict: args['field_name'] = _dict['field'] + if 'field_type' in _dict: + args['field_type'] = _dict['field_type'] if 'type' in _dict: args['field_type'] = _dict['type'] return cls(**args) @@ -4777,7 +4827,6 @@ def __ne__(self, other): return not self == other - class QueryNoticesResult(object): """ QueryNoticesResult.