Skip to content

Commit

Permalink
Implement new docs-bq output
Browse files Browse the repository at this point in the history
  • Loading branch information
cdzombak committed Apr 9, 2018
1 parent de5ea2e commit b294950
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 35 deletions.
4 changes: 2 additions & 2 deletions zschema/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ def main():
print json.dumps(record.to_bigquery())
elif command == "elasticsearch":
print json.dumps(record.to_es(recname))
elif command == "bq-annotated":
print json.dumps(record.to_bigquery(annotated=True))
elif command == "docs-es":
print json.dumps(record.docs_es(recname))
elif command == "docs-bq":
print json.dumps(record.docs_bq(recname))
elif command == "json":
print record.to_json()
elif command == "flat":
Expand Down
57 changes: 38 additions & 19 deletions zschema/compounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,16 @@ def print_indent_string(self, name, indent):
print tabs + name + ":%s:" % self.__class__.__name__,
self.object_.print_indent_string(self.key_to_string(name), indent+1)

def to_bigquery(self, name, annotated=False, parent_category=None):
retv = self.object_.to_bigquery(name, annotated=annotated)
def to_bigquery(self, name):
retv = self.object_.to_bigquery(name)
retv["mode"] = "REPEATED"
if annotated:
category = self.category if self.category else parent_category
retv["category"] = category
return retv

def docs_bq(self, parent_category=None):
retv = self.object_.docs_bq()
category = self.category if self.category else parent_category
retv["category"] = category
retv["repeated"] = True
return retv

def to_es(self):
Expand Down Expand Up @@ -124,9 +128,8 @@ def merge(self, other):
self.definition = newdef
return self

def to_bigquery(self, name, annotated=False, parent_category=None):
category = self.category if self.category else parent_category
fields = [v.to_bigquery(k, annotated=annotated, parent_category=category) \
def to_bigquery(self, name):
fields = [v.to_bigquery(k) \
for (k,v) in sorted(self.definition.iteritems()) \
if not v.exclude_bigquery
]
Expand All @@ -136,8 +139,14 @@ def to_bigquery(self, name, annotated=False, parent_category=None):
"fields":fields,
"mode":"REQUIRED" if self.required else "NULLABLE"
}
if annotated and self.doc:
retv["doc"] = self.doc
return retv

def docs_bq(self, parent_category=None):
retv = self._docs_common(parent_category=parent_category)
fields = { self.key_to_bq(k): v.docs_bq() \
for (k,v) in sorted(self.definition.iteritems()) \
if not v.exclude_bigquery }
retv["fields"] = fields
return retv

def print_indent_string(self, name, indent):
Expand Down Expand Up @@ -192,15 +201,21 @@ def __init__(self, object_, subrecord_name, max_items=10, category=None):
ListOf.__init__(self, object_, max_items, category=category)
self.subrecord_name = subrecord_name

def to_bigquery(self, name, annotated=False, parent_category=None):
def to_bigquery(self, name):
subr = SubRecord({
self.subrecord_name:ListOf(self.object_)
})
retv = subr.to_bigquery(self.key_to_bq(name), annotated=annotated)
retv = subr.to_bigquery(self.key_to_bq(name))
retv["mode"] = "REPEATED"
if annotated:
category = self.category if self.category else parent_category
retv["category"] = category
return retv

def docs_bq(self, parent_category=None):
subr = SubRecord({
self.subrecord_name: ListOf(self.object_)
})
category = self.category if self.category else parent_category
retv = subr.docs_bq(parent_category=category)
retv["repeated"] = True
return retv


Expand All @@ -210,16 +225,20 @@ def to_es(self, name):
return {name:SubRecord.to_es(self)}

def docs_es(self, name, parent_category=None):
return {name: SubRecord.docs_es(self, parent_category=parent_category)}

def to_bigquery(self, annotated=False, parent_category=None):
category = self.category if self.category else parent_category
return {name: SubRecord.docs_es(self, parent_category=category)}

def to_bigquery(self):
source = sorted(self.definition.iteritems())
return [s.to_bigquery(name, annotated=annotated, parent_category=category) \
return [s.to_bigquery(name) \
for (name, s) in source \
if not s.exclude_bigquery
]

def docs_bq(self, name, parent_category=None):
category = self.category if self.category else parent_category
return {name: SubRecord.docs_bq(self, parent_category=category)}

def print_indent_string(self):
for name, field in sorted(self.definition.iteritems()):
field.print_indent_string(name, 0)
Expand Down
20 changes: 6 additions & 14 deletions zschema/leaves.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,26 +96,18 @@ def docs_es(self, parent_category=None):
retv["type"] = self.ES_TYPE
return retv

def to_bigquery(self, name, annotated=False, parent_category=None):
def docs_bq(self, parent_category=None):
retv = self._docs_common(parent_category)
retv["type"] = self.BQ_TYPE
return retv

def to_bigquery(self, name):
if not self._check_valid_name(name):
raise Exception("Invalid field name: %s" % name)
mode = "REQUIRED" if self.required else "NULLABLE"
retv = {"name":self.key_to_bq(name), "type":self.BQ_TYPE, "mode":mode}
if self.doc:
retv["doc"] = self.doc
if annotated:
retv["detail_type"] = self.__class__.__name__
category = self.category if self.category else parent_category
retv["category"] = category
if self.min_value:
retv["min_value"] = self.min_value
if self.max_value:
retv["max_value"] = self.max_value
if hasattr(self, "values_s") and len(self.values_s):
# gotta clean this up but for now...
retv["values"] = list(self.values_s)
else:
retv["examples"] = self.examples
return retv

def to_string(self, name):
Expand Down

0 comments on commit b294950

Please sign in to comment.