From b294950df3c968d93098b3639542c065e592527c Mon Sep 17 00:00:00 2001 From: Chris Dzombak Date: Mon, 9 Apr 2018 14:22:55 -0400 Subject: [PATCH] Implement new docs-bq output --- zschema/__main__.py | 4 ++-- zschema/compounds.py | 57 +++++++++++++++++++++++++++++--------------- zschema/leaves.py | 20 +++++----------- 3 files changed, 46 insertions(+), 35 deletions(-) diff --git a/zschema/__main__.py b/zschema/__main__.py index cc24d08..558622b 100644 --- a/zschema/__main__.py +++ b/zschema/__main__.py @@ -28,10 +28,10 @@ def main(): print json.dumps(record.to_bigquery()) elif command == "elasticsearch": print json.dumps(record.to_es(recname)) - elif command == "bq-annotated": - print json.dumps(record.to_bigquery(annotated=True)) elif command == "docs-es": print json.dumps(record.docs_es(recname)) + elif command == "docs-bq": + print json.dumps(record.docs_bq(recname)) elif command == "json": print record.to_json() elif command == "flat": diff --git a/zschema/compounds.py b/zschema/compounds.py index 574beed..01e3ab4 100644 --- a/zschema/compounds.py +++ b/zschema/compounds.py @@ -29,12 +29,16 @@ def print_indent_string(self, name, indent): print tabs + name + ":%s:" % self.__class__.__name__, self.object_.print_indent_string(self.key_to_string(name), indent+1) - def to_bigquery(self, name, annotated=False, parent_category=None): - retv = self.object_.to_bigquery(name, annotated=annotated) + def to_bigquery(self, name): + retv = self.object_.to_bigquery(name) retv["mode"] = "REPEATED" - if annotated: - category = self.category if self.category else parent_category - retv["category"] = category + return retv + + def docs_bq(self, parent_category=None): + retv = self.object_.docs_bq() + category = self.category if self.category else parent_category + retv["category"] = category + retv["repeated"] = True return retv def to_es(self): @@ -124,9 +128,8 @@ def merge(self, other): self.definition = newdef return self - def to_bigquery(self, name, annotated=False, parent_category=None): - category = self.category if self.category else parent_category - fields = [v.to_bigquery(k, annotated=annotated, parent_category=category) \ + def to_bigquery(self, name): + fields = [v.to_bigquery(k) \ for (k,v) in sorted(self.definition.iteritems()) \ if not v.exclude_bigquery ] @@ -136,8 +139,14 @@ def to_bigquery(self, name, annotated=False, parent_category=None): "fields":fields, "mode":"REQUIRED" if self.required else "NULLABLE" } - if annotated and self.doc: - retv["doc"] = self.doc + return retv + + def docs_bq(self, parent_category=None): + retv = self._docs_common(parent_category=parent_category) + fields = { self.key_to_bq(k): v.docs_bq() \ + for (k,v) in sorted(self.definition.iteritems()) \ + if not v.exclude_bigquery } + retv["fields"] = fields return retv def print_indent_string(self, name, indent): @@ -192,15 +201,21 @@ def __init__(self, object_, subrecord_name, max_items=10, category=None): ListOf.__init__(self, object_, max_items, category=category) self.subrecord_name = subrecord_name - def to_bigquery(self, name, annotated=False, parent_category=None): + def to_bigquery(self, name): subr = SubRecord({ self.subrecord_name:ListOf(self.object_) }) - retv = subr.to_bigquery(self.key_to_bq(name), annotated=annotated) + retv = subr.to_bigquery(self.key_to_bq(name)) retv["mode"] = "REPEATED" - if annotated: - category = self.category if self.category else parent_category - retv["category"] = category + return retv + + def docs_bq(self, parent_category=None): + subr = SubRecord({ + self.subrecord_name: ListOf(self.object_) + }) + category = self.category if self.category else parent_category + retv = subr.docs_bq(parent_category=category) + retv["repeated"] = True return retv @@ -210,16 +225,20 @@ def to_es(self, name): return {name:SubRecord.to_es(self)} def docs_es(self, name, parent_category=None): - return {name: SubRecord.docs_es(self, parent_category=parent_category)} - - def to_bigquery(self, annotated=False, parent_category=None): category = self.category if self.category else parent_category + return {name: SubRecord.docs_es(self, parent_category=category)} + + def to_bigquery(self): source = sorted(self.definition.iteritems()) - return [s.to_bigquery(name, annotated=annotated, parent_category=category) \ + return [s.to_bigquery(name) \ for (name, s) in source \ if not s.exclude_bigquery ] + def docs_bq(self, name, parent_category=None): + category = self.category if self.category else parent_category + return {name: SubRecord.docs_bq(self, parent_category=category)} + def print_indent_string(self): for name, field in sorted(self.definition.iteritems()): field.print_indent_string(name, 0) diff --git a/zschema/leaves.py b/zschema/leaves.py index df9716d..c0c4dcb 100644 --- a/zschema/leaves.py +++ b/zschema/leaves.py @@ -96,26 +96,18 @@ def docs_es(self, parent_category=None): retv["type"] = self.ES_TYPE return retv - def to_bigquery(self, name, annotated=False, parent_category=None): + def docs_bq(self, parent_category=None): + retv = self._docs_common(parent_category) + retv["type"] = self.BQ_TYPE + return retv + + def to_bigquery(self, name): if not self._check_valid_name(name): raise Exception("Invalid field name: %s" % name) mode = "REQUIRED" if self.required else "NULLABLE" retv = {"name":self.key_to_bq(name), "type":self.BQ_TYPE, "mode":mode} if self.doc: retv["doc"] = self.doc - if annotated: - retv["detail_type"] = self.__class__.__name__ - category = self.category if self.category else parent_category - retv["category"] = category - if self.min_value: - retv["min_value"] = self.min_value - if self.max_value: - retv["max_value"] = self.max_value - if hasattr(self, "values_s") and len(self.values_s): - # gotta clean this up but for now... - retv["values"] = list(self.values_s) - else: - retv["examples"] = self.examples return retv def to_string(self, name):