Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JSON schema descriptions for BQ and ES #18

Merged
merged 17 commits into from
Apr 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 7 additions & 17 deletions zschema/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@

def usage():
sys.stderr.write("USAGE: %s command schema [file].\n" % sys.argv[0].split("/")[-1])
sys.stderr.write("Valid commands: bigquery, elasticsearch, json, text, html, censys-html, flat, validate.\n")
sys.stderr.write("schema should be defined as file.py:record\n")
sys.stderr.write("Valid commands: bigquery, elasticsearch, docs-es, docs-bq, json, flat, validate.\n")
sys.stderr.write("Schema should be passed as file.py:record\n")
sys.stderr.write("The optional 'file' argument is used only as the test file for the 'validate' command.\n")
sys.stderr.write("VERSION: %s\n" % zschema.__version__)
sys.exit(1)

Expand All @@ -27,26 +28,15 @@ def main():
print json.dumps(record.to_bigquery())
elif command == "elasticsearch":
print json.dumps(record.to_es(recname))
elif command == "docs-es":
print json.dumps(record.docs_es(recname))
elif command == "docs-bq":
print json.dumps(record.docs_bq(recname))
elif command == "json":
print record.to_json()
elif command == "html":
for r in record.to_flat():
type_ = r.get("es_type", "")
print "<tr><td>%s</td><td>%s</td></tr>" % (r["name"], type_)
elif command == "text":
print record.to_text()
elif command == "flat":
for r in record.to_flat():
print json.dumps(r)
elif command == "censys-html":
for r in record.to_flat():
type_ = r.get("es_type", None)
len_ = r["name"].count(".")
style = 'style="padding-left: %ipx"' % (15 * len_ + 5)
if not type_:
print '<tr class="record"><td %s>%s</td><td>%s</td></tr>' % (style, r["name"], "")
else:
print "<tr><td %s>%s</td><td>%s</td></tr>" % (style, r["name"], type_)
elif command == "validate":
if not os.path.exists(sys.argv[3]):
sys.stderr.write("Invalid test file. %s does not exist.\n" % sys.argv[3])
Expand Down
103 changes: 83 additions & 20 deletions zschema/compounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ def _is_valid_object(name, object_):

class ListOf(Keyable):

def __init__(self, object_, max_items=10):
def __init__(self, object_, max_items=10, doc=None, category=None):
self.object_ = object_
self.max_items = max_items
self.category = category
self.doc = doc
_is_valid_object("Anonymous ListOf", object_)

@property
Expand All @@ -33,9 +35,27 @@ def to_bigquery(self, name):
retv["mode"] = "REPEATED"
return retv

def docs_bq(self, parent_category=None):
retv = self.object_.docs_bq()
category = self.category or parent_category
retv["category"] = category
retv["repeated"] = True
if self.doc:
retv["doc"] = self.doc
return retv

def to_es(self):
return self.object_.to_es()

def docs_es(self, parent_category=None):
retv = self.object_.docs_es()
category = self.category or parent_category
retv["category"] = category
retv["repeated"] = True
if self.doc:
retv["doc"] = self.doc
return retv

def validate(self, name, value):
if type(value) != list:
raise DataValidationException("%s: %s is not a list",
Expand All @@ -59,11 +79,13 @@ def __init__(self,
doc=None,
extends=None,
allow_unknown=False,
exclude=None):
exclude=None,
category=None):
self.definition = definition
self.required = required
self.allow_unknown = allow_unknown
self.doc = doc
self.category = category
self._exclude = set(exclude) if exclude else set([])
# merge
if extends:
Expand Down Expand Up @@ -113,14 +135,25 @@ def merge(self, other):
return self

def to_bigquery(self, name):
fields = [v.to_bigquery(k) for (k,v) in sorted(self.definition.iteritems()) if \
not v.exclude_bigquery]
return {
fields = [v.to_bigquery(k) \
for (k,v) in sorted(self.definition.iteritems()) \
if not v.exclude_bigquery
]
retv = {
"name":self.key_to_bq(name),
"type":"RECORD",
"fields":fields,
"mode":"REQUIRED" if self.required else "NULLABLE"
}
return retv

def docs_bq(self, parent_category=None):
retv = self._docs_common(parent_category=parent_category)
fields = { self.key_to_bq(k): v.docs_bq() \
for (k,v) in sorted(self.definition.iteritems()) \
if not v.exclude_bigquery }
retv["fields"] = fields
return retv

def print_indent_string(self, name, indent):
tabs = "\t" * indent if indent else ""
Expand All @@ -129,10 +162,28 @@ def print_indent_string(self, name, indent):
value.print_indent_string(name, indent+1)

def to_es(self):
p = {self.key_to_es(k): v.to_es() for k, v in sorted(self.definition.iteritems()) \
p = {self.key_to_es(k): v.to_es() \
for k, v in sorted(self.definition.iteritems()) \
if not v.exclude_elasticsearch}
return {"properties": p}

def _docs_common(self, parent_category):
category = self.category or parent_category
retv = {
"category": category,
"doc": self.doc,
"type": self.__class__.__name__,
"required": self.required,
}
return retv

def docs_es(self, parent_category=None):
retv = self._docs_common(parent_category=parent_category)
retv["fields"] = { self.key_to_es(k): v.docs_es() \
for k, v in sorted(self.definition.iteritems()) \
if not v.exclude_elasticsearch }
return retv

def to_dict(self):
source = sorted(self.definition.iteritems())
p = {self.key_to_es(k): v.to_dict() for k, v in source}
Expand All @@ -152,8 +203,8 @@ def validate(self, name, value):

class NestedListOf(ListOf):

def __init__(self, object_, subrecord_name, max_items=10):
ListOf.__init__(self, object_, max_items)
def __init__(self, object_, subrecord_name, max_items=10, doc=None, category=None):
ListOf.__init__(self, object_, max_items, doc=doc, category=category)
self.subrecord_name = subrecord_name

def to_bigquery(self, name):
Expand All @@ -162,6 +213,19 @@ def to_bigquery(self, name):
})
retv = subr.to_bigquery(self.key_to_bq(name))
retv["mode"] = "REPEATED"
if self.doc:
retv["doc"] = self.doc
return retv

def docs_bq(self, parent_category=None):
subr = SubRecord({
self.subrecord_name: ListOf(self.object_)
})
category = self.category or parent_category
retv = subr.docs_bq(parent_category=category)
retv["repeated"] = True
if self.doc:
retv["doc"] = self.doc
return retv


Expand All @@ -170,24 +234,25 @@ class Record(SubRecord):
def to_es(self, name):
return {name:SubRecord.to_es(self)}

def docs_es(self, name, parent_category=None):
category = self.category or parent_category
return {name: SubRecord.docs_es(self, parent_category=category)}

def to_bigquery(self):
source = sorted(self.definition.iteritems())
return [s.to_bigquery(name) for (name, s) in source \
if not s.exclude_bigquery]

def to_html(self):
pass
return [s.to_bigquery(name) \
for (name, s) in source \
if not s.exclude_bigquery
]

def to_documented_html(self):
pass
def docs_bq(self, name, parent_category=None):
category = self.category or parent_category
return {name: SubRecord.docs_bq(self, parent_category=category)}

def print_indent_string(self):
for name, field in sorted(self.definition.iteritems()):
field.print_indent_string(name, 0)

def to_dotted_text(self):
pass

def validate(self, value):
if type(value) != dict:
raise DataValidationException("record is not a dict", str(value))
Expand All @@ -212,5 +277,3 @@ def to_flat(self):
@classmethod
def from_json(cls, j):
return cls({(k, __encode(v)) for k, v in sorted(j.iteritems())})


41 changes: 30 additions & 11 deletions zschema/leaves.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@ def __init__(self,
es_index=None,
es_analyzer=None,
doc=None,
examples=None,
es_include_raw=None,
deprecated=False,
ignore=False,
autocomplete_include=True,
autocomplete_category=None,
autocomplete_icon=None,
category=None,
exclude=None,
metadata=None,
units=None,
Expand All @@ -30,6 +29,7 @@ def __init__(self,
self.es_index = es_index
self.es_analyzer = es_analyzer
self.doc = doc
self.examples = examples if examples else []
if es_include_raw is not None:
self.es_include_raw = es_include_raw
else:
Expand All @@ -40,9 +40,7 @@ def __init__(self,
e = "WARN: %s is deprecated and will be removed in a "\
"future release\n" % self.__class__.__name__
sys.stderr.write(e)
self.autocomplete_category = autocomplete_category
self.autocomplete_category = autocomplete_category
self.autocomplete_icon = autocomplete_icon
self.category = category
self._exclude = set(exclude) if exclude else set([])
self.metadata = metadata if metadata else {}
self.units = units
Expand All @@ -56,7 +54,8 @@ def to_dict(self):
"type":self.__class__.__name__,
"es_type":self.ES_TYPE,
"bq_type":self.BQ_TYPE,
"metadata":self.metadata
"metadata":self.metadata,
"examples": self.examples,
}
if self.units is not None:
retv["units"] = self.units
Expand All @@ -72,13 +71,36 @@ def to_es(self):
self.add_es_var(retv, "analyzer", "es_analyzer", "ES_ANALYZER")
self.add_es_var(retv, "search_analyzer", "es_search_analyzer",
"ES_SEARCH_ANALYZER")

if self.es_include_raw:
retv["fields"] = {
"raw":{"type":"keyword"}
}
return retv

def _docs_common(self, parent_category):
retv = {
"detail_type": self.__class__.__name__,
"category": self.category or parent_category,
"doc": self.doc,
"required": self.required,
}
if hasattr(self, "values_s") and len(self.values_s):
retv["values"] = list(self.values_s)
else:
retv["examples"] = self.examples
return retv

def docs_es(self, parent_category=None):
retv = self._docs_common(parent_category)
self.add_es_var(retv, "analyzer", "es_analyzer", "ES_ANALYZER")
retv["type"] = self.ES_TYPE
return retv

def docs_bq(self, parent_category=None):
retv = self._docs_common(parent_category)
retv["type"] = self.BQ_TYPE
return retv

def to_bigquery(self, name):
if not self._check_valid_name(name):
raise Exception("Invalid field name: %s" % name)
Expand Down Expand Up @@ -118,9 +140,6 @@ def to_flat(self, parent, name, repeated=False):
"mode":mode
}

def to_autocomplete(self, parent, name, repated=False):
pass

def print_indent_string(self, name, indent):
val = self.key_to_string(name)
if indent:
Expand Down
Loading