Skip to content

Commit

Permalink
Merge branch 'release/0.2.3'
Browse files Browse the repository at this point in the history
  • Loading branch information
chuwy committed May 22, 2017
2 parents 4cbc0f6 + 72a4840 commit 0ddca91
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 9 deletions.
5 changes: 5 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
Version 0.2.3 (2017-05-22)
--------------------------
Add extract_schema(uri) function (#26)
Use standard regular expression for schema URIs (#24)

Version 0.2.2 (2017-05-05)
--------------------------
Mark run ids archived to Glacier as processed (#23)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

setup(
name='snowplow_analytics_sdk',
version='0.2.2',
version='0.2.3',
description='Snowplow Analytics Python SDK',
author='Fred Blundun',
url='https://github.com/snowplow/snowplow-python-analytics-sdk',
Expand Down
46 changes: 38 additions & 8 deletions snowplow_analytics_sdk/json_shredder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,57 @@

import re
import json

from snowplow_analytics_sdk.snowplow_event_transformation_exception import SnowplowEventTransformationException


# TODO: remove in 0.3.0
# See: https://github.com/snowplow/snowplow-python-analytics-sdk/issues/27
SCHEMA_PATTERN = re.compile(""".+:([a-zA-Z0-9_\.]+)/([a-zA-Z0-9_]+)/[^/]+/(.*)""")

SCHEMA_URI = ("^iglu:" # Protocol
"([a-zA-Z0-9-_.]+)/" # Vendor
"([a-zA-Z0-9-_]+)/" # Name
"([a-zA-Z0-9-_]+)/" # Format
"([1-9][0-9]*" # MODEL (cannot start with 0)
"(?:-(?:0|[1-9][0-9]*)){2})$") # REVISION and ADDITION

def fix_schema(prefix, schema):
SCHEMA_URI_REGEX = re.compile(SCHEMA_URI)


def extract_schema(uri):
"""
Create an Elasticsearch field name from a schema string
Extracts Schema information from Iglu URI
>>> extract_schema("iglu:com.acme-corporation_underscore/event_name-dash/jsonschema/1-10-1")['vendor']
'com.acme-corporation_underscore'
"""
match = re.match(SCHEMA_PATTERN, schema)
match = re.match(SCHEMA_URI_REGEX, uri)
if match:
snake_case_organization = match.group(1).replace('.', '_').lower()
snake_case_name = re.sub('([^A-Z_])([A-Z])', '\g<1>_\g<2>', match.group(2)).lower()
model = match.group(3).split('-')[0]
return "{}_{}_{}_{}".format(prefix, snake_case_organization, snake_case_name, model)
return {
'vendor': match.group(1),
'name': match.group(2),
'format': match.group(3),
'version': match.group(4)

}
else:
raise SnowplowEventTransformationException([
"Schema {} does not conform to regular expression {}".format(schema, SCHEMA_PATTERN)
"Schema {} does not conform to regular expression {}".format(uri, SCHEMA_URI)
])


def fix_schema(prefix, schema):
"""
Create an Elasticsearch field name from a schema string
"""
schema_dict = extract_schema(schema)
snake_case_organization = schema_dict['vendor'].replace('.', '_').lower()
snake_case_name = re.sub('([^A-Z_])([A-Z])', '\g<1>_\g<2>', schema_dict['name']).lower()
model = schema_dict['version'].split('-')[0]
return "{}_{}_{}_{}".format(prefix, snake_case_organization, snake_case_name, model)


def parse_contexts(contexts):
"""
Convert a contexts JSON to an Elasticsearch-compatible list of key-value pairs
Expand Down

0 comments on commit 0ddca91

Please sign in to comment.