From 22aaf9e2b51ec2b2ba7641c17509f06683a87254 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Mon, 22 May 2017 20:52:36 +0700 Subject: [PATCH 1/3] Add extract_schema(uri) function (close #26) --- snowplow_analytics_sdk/json_shredder.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/snowplow_analytics_sdk/json_shredder.py b/snowplow_analytics_sdk/json_shredder.py index 9f53399..f5e5be4 100644 --- a/snowplow_analytics_sdk/json_shredder.py +++ b/snowplow_analytics_sdk/json_shredder.py @@ -21,6 +21,28 @@ SCHEMA_PATTERN = re.compile(""".+:([a-zA-Z0-9_\.]+)/([a-zA-Z0-9_]+)/[^/]+/(.*)""") +def extract_schema(uri): + """ + Extracts Schema information from Iglu URI + + >>> extract_schema("iglu:com.acme-corporation_underscore/event_name-dash/jsonschema/1-10-1") + {'version': '1-10-1', 'vendor': 'com.acme-corporation_underscore', 'name': 'event_name-dash', 'format': 'jsonschema'} + """ + match = re.match(SCHEMA_URI_REGEX, uri) + if match: + return { + 'vendor': match.group(1), + 'name': match.group(2), + 'format': match.group(3), + 'version': match.group(4) + + } + else: + raise SnowplowEventTransformationException([ + "Schema {} does not conform to regular expression {}".format(uri, SCHEMA_URI) + ]) + + def fix_schema(prefix, schema): """ Create an Elasticsearch field name from a schema string From ae02bb8bacb731706336fb768104996206a8fc2a Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Mon, 15 May 2017 17:50:27 +0700 Subject: [PATCH 2/3] Use standard regular expression for schema URIs (close #24) --- setup.py | 2 +- snowplow_analytics_sdk/json_shredder.py | 32 +++++++++++++++---------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/setup.py b/setup.py index 3693291..cb3512e 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( name='snowplow_analytics_sdk', - version='0.2.2', + version='0.2.3-a1', description='Snowplow Analytics Python SDK', author='Fred Blundun', url='https://github.com/snowplow/snowplow-python-analytics-sdk', diff --git a/snowplow_analytics_sdk/json_shredder.py b/snowplow_analytics_sdk/json_shredder.py index f5e5be4..7f60754 100644 --- a/snowplow_analytics_sdk/json_shredder.py +++ b/snowplow_analytics_sdk/json_shredder.py @@ -16,17 +16,30 @@ import re import json + from snowplow_analytics_sdk.snowplow_event_transformation_exception import SnowplowEventTransformationException + +# TODO: remove in 0.3.0 +# See: https://github.com/snowplow/snowplow-python-analytics-sdk/issues/27 SCHEMA_PATTERN = re.compile(""".+:([a-zA-Z0-9_\.]+)/([a-zA-Z0-9_]+)/[^/]+/(.*)""") +SCHEMA_URI = ("^iglu:" # Protocol + "([a-zA-Z0-9-_.]+)/" # Vendor + "([a-zA-Z0-9-_]+)/" # Name + "([a-zA-Z0-9-_]+)/" # Format + "([1-9][0-9]*" # MODEL (cannot start with 0) + "(?:-(?:0|[1-9][0-9]*)){2})$") # REVISION and ADDITION + +SCHEMA_URI_REGEX = re.compile(SCHEMA_URI) + def extract_schema(uri): """ Extracts Schema information from Iglu URI - >>> extract_schema("iglu:com.acme-corporation_underscore/event_name-dash/jsonschema/1-10-1") - {'version': '1-10-1', 'vendor': 'com.acme-corporation_underscore', 'name': 'event_name-dash', 'format': 'jsonschema'} + >>> extract_schema("iglu:com.acme-corporation_underscore/event_name-dash/jsonschema/1-10-1")['vendor'] + 'com.acme-corporation_underscore' """ match = re.match(SCHEMA_URI_REGEX, uri) if match: @@ -47,16 +60,11 @@ def fix_schema(prefix, schema): """ Create an Elasticsearch field name from a schema string """ - match = re.match(SCHEMA_PATTERN, schema) - if match: - snake_case_organization = match.group(1).replace('.', '_').lower() - snake_case_name = re.sub('([^A-Z_])([A-Z])', '\g<1>_\g<2>', match.group(2)).lower() - model = match.group(3).split('-')[0] - return "{}_{}_{}_{}".format(prefix, snake_case_organization, snake_case_name, model) - else: - raise SnowplowEventTransformationException([ - "Schema {} does not conform to regular expression {}".format(schema, SCHEMA_PATTERN) - ]) + schema_dict = extract_schema(schema) + snake_case_organization = schema_dict['vendor'].replace('.', '_').lower() + snake_case_name = re.sub('([^A-Z_])([A-Z])', '\g<1>_\g<2>', schema_dict['name']).lower() + model = schema_dict['version'].split('-')[0] + return "{}_{}_{}_{}".format(prefix, snake_case_organization, snake_case_name, model) def parse_contexts(contexts): From 72a48406c32c2542085e95b2faa65958572eb7c4 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Mon, 15 May 2017 20:42:42 +0700 Subject: [PATCH 3/3] Prepared for release --- CHANGES.txt | 5 +++++ setup.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 35a1190..1403565 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,8 @@ +Version 0.2.3 (2017-05-22) +-------------------------- +Add extract_schema(uri) function (#26) +Use standard regular expression for schema URIs (#24) + Version 0.2.2 (2017-05-05) -------------------------- Mark run ids archived to Glacier as processed (#23) diff --git a/setup.py b/setup.py index cb3512e..4fad3d3 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( name='snowplow_analytics_sdk', - version='0.2.3-a1', + version='0.2.3', description='Snowplow Analytics Python SDK', author='Fred Blundun', url='https://github.com/snowplow/snowplow-python-analytics-sdk',