From e692ab10c6c92554debfbaca5602fbe8fefe48a7 Mon Sep 17 00:00:00 2001 From: nitusima Date: Fri, 12 Jul 2024 14:13:55 +0300 Subject: [PATCH] . --- code/dihlibs/SQLCipherDialect.py | 38 +++++ code/dihlibs/command.py | 3 - code/dihlibs/data/docker/backend.zip | Bin 19579 -> 19579 bytes code/dihlibs/data/docker/cronies.zip | Bin 3849 -> 3849 bytes code/dihlibs/db.py | 2 +- code/dihlibs/dhis/__init__.py | 9 +- code/dihlibs/dhis/meta.py | 2 +- code/dihlibs/drive.py | 4 +- code/dihlibs/evaluator.py | 144 +++++++++++++++++ code/dihlibs/functions.py | 17 +- code/dihlibs/jsonq.py | 230 +++++++++++++++++++++++++++ code/setup.py | 7 +- 12 files changed, 441 insertions(+), 15 deletions(-) create mode 100644 code/dihlibs/SQLCipherDialect.py create mode 100644 code/dihlibs/evaluator.py create mode 100644 code/dihlibs/jsonq.py diff --git a/code/dihlibs/SQLCipherDialect.py b/code/dihlibs/SQLCipherDialect.py new file mode 100644 index 0000000..dbe729b --- /dev/null +++ b/code/dihlibs/SQLCipherDialect.py @@ -0,0 +1,38 @@ +from sqlalchemy.dialects.sqlite.base import SQLiteDialect +from sqlalchemy.engine.url import make_url +import pysqlcipher3.dbapi2 as sqlcipher +import dihlibs.functions as fn + + +class SQLCipherDialect(SQLiteDialect): + name = "sqlcipher" + driver = "pysqlcipher3" + paramstyle = "qmark" + supports_statement_cache = True + key = None + + @classmethod + def dbapi(cls): + return sqlcipher + + def create_connect_args(self, url): + parsed_url = make_url(url) + self.key = parsed_url.query.get("key", None) + self._adb_pulldb_if_android_db(parsed_url) + opts = url.translate_connect_args() + opts.pop("key", None) + return [[], opts] + + def connect(self, *cargs, **cparams): + dbapi_con = super().connect(*cargs, **cparams) + if self.key: + dbapi_con.execute(f"PRAGMA key='{self.key}';") + return dbapi_con + + def _adb_pulldb_if_android_db(self, parsed_url): + package = parsed_url.query.get("package", None) + if not package: + return + db = parsed_url.database + cmd = f"$HOME/Android/Sdk/platform-tools/adb exec-out run-as {package} cat /data/data/{package}/databases/{db} > ./{db} " + print(fn.cmd_wait(cmd)) diff --git a/code/dihlibs/command.py b/code/dihlibs/command.py index cda69bd..b76dc5e 100644 --- a/code/dihlibs/command.py +++ b/code/dihlibs/command.py @@ -1,8 +1,6 @@ from concurrent.futures import ThreadPoolExecutor import concurrent.futures -from typing import Callable, Any from subprocess import Popen, PIPE -import select,os from pathlib import Path import pkg_resources import dihlibs.functions as fn @@ -11,7 +9,6 @@ class _Command: def __init__(self, cmd, bg=True): - # bash_functions=Path(__file__).parent / "bash/script.sh" bash_functions = pkg_resources.resource_filename('dihlibs', 'data/bash/script.sh') self.cmd = f'. $HOME/.bashrc && . {bash_functions} && {cmd.strip()}' self.bg = bg diff --git a/code/dihlibs/data/docker/backend.zip b/code/dihlibs/data/docker/backend.zip index 8f43eaae5c277ddb21eab2924f9ee850ee7ea1f0..a5c587dbde12963840211e6bee83dcd88b496b0b 100644 GIT binary patch delta 177 zcmex8gYowa#tHh&@7=m48o7X|$u(?p6Q>(9_f(cnJUay@GMU4jZ}J32ZIJNheT<5X zV8$b+ClJO-mJN(hrIBEn8*H1Iz>Gdl9%kkjRdt)^@=S!NHRQXh2vsXJS=qo6tSsJO kIz;xTAtN)hV0GQ*zb0(#U@116&n(POt~YM}<515H09gh_KL7v# delta 177 zcmex8gYowa#tHh&JWGov8o7X|$u(?p6Q>(9?^{+h@$3|s$Yc(8zR439wL!w0_c1Cm zf*FsPobBxjYjgY7P0WDnivtO;$Fr1k1)7Oozz+ cG-PCE76MxG*MyB7EX8K?nFY-F<515H00S9CdjJ3c diff --git a/code/dihlibs/data/docker/cronies.zip b/code/dihlibs/data/docker/cronies.zip index 00ebae47ab0a6b4c9a02dc1c29dbce0135b5f98d..11ce12dc752974c34d0b91efb3a4a87b8484122a 100644 GIT binary patch delta 76 zcmeB_>y(?I&-~u4Yw`sqsfj*D%srK*6B{RjsLi5`I~bu--e3Va<~~L+V?E1hRuE%y NE!Sy(?I&&;#5Xz~Rnsfj*D%=?xVO>CS9qBe^%?qGyUd4mPynEM#PjP)$1SwW1+ NwOqHs0@2*-m;quq9xDI< diff --git a/code/dihlibs/db.py b/code/dihlibs/db.py index e591e35..2db4477 100644 --- a/code/dihlibs/db.py +++ b/code/dihlibs/db.py @@ -131,4 +131,4 @@ def upate_table_df(self, df, tablename, id_column="id"): ) return self.exec(sql) -registry.register("sqlcipher", "dihlibs.SQLCipherDialect", "SQLCipherDialect") \ No newline at end of file +# registry.register("sqlcipher", "dihlibs.SQLCipherDialect", "SQLCipherDialect") \ No newline at end of file diff --git a/code/dihlibs/dhis/__init__.py b/code/dihlibs/dhis/__init__.py index 5f685ec..879adff 100644 --- a/code/dihlibs/dhis/__init__.py +++ b/code/dihlibs/dhis/__init__.py @@ -213,11 +213,17 @@ def get_period(self, when, period_type="monthly"): } ).get(period_type.lower()) + def get_week_date(self,date): + parts = date.split("W") + week_start = 7 * int(parts[1]) + year_start = datetime.strptime(parts[0], "%Y") + return year_start + relativedelta(days=abs(week_start)) + def period_to_db_date(self, date: str): formats = ["%Y-%m-%d", "%YW%W", "%Y%m", "%Y"] for fmt in formats: try: - dt = datetime.strptime(date, fmt) + dt = datetime.strptime(date, fmt) if "W" not in date else self.get_week_date(date) return dt.strftime("%Y-%m-%d") except ValueError: pass @@ -235,6 +241,7 @@ def set_period_cols(r): e_map = e_map.reset_index().merge( self.datasets, left_on="dataset_id", right_on="id" ) + e_map.loc[:, ["period_column", "period_db", "period"]] = e_map.apply( set_period_cols, axis=1 ).to_list() diff --git a/code/dihlibs/dhis/meta.py b/code/dihlibs/dhis/meta.py index a1e9861..53c478b 100644 --- a/code/dihlibs/dhis/meta.py +++ b/code/dihlibs/dhis/meta.py @@ -38,7 +38,7 @@ def _normalize_combo(self, input): def add_category_combo(self): res=rq.get(f"{self._base_url}/api/categoryCombos?paging=false&fields=id~rename(categoryCombo),name~rename(comboName)").json() combos=pd.DataFrame(res.get('categoryCombos')) - clean=lambda input:','.join(sorted(re.split(r'(?:\s+)?(?:,|and)(?:\s+)?',input))).replace(' ','_').lower() + # clean=lambda input:','.join(sorted(re.split(r'(?:\s+)?(?:,|and)(?:\s+)?',input))).replace(' ','_').lower() combos['comboName']=combos.comboName.apply(self._normalize_combo) self._map['comboName']=self._map.disaggregation.fillna('default').apply(self._normalize_combo) return self._map.merge(combos,how='left',on='comboName') diff --git a/code/dihlibs/drive.py b/code/dihlibs/drive.py index 1f8f8d7..409d525 100644 --- a/code/dihlibs/drive.py +++ b/code/dihlibs/drive.py @@ -10,13 +10,13 @@ class Drive: - def __init__(self, key: dict): + def __init__(self, key: dict=None,credentials=None): try: scope = [ "https://www.googleapis.com/auth/drive.file", "https://www.googleapis.com/auth/drive.readonly", ] - credentials = ServiceAccountCredentials.from_json_keyfile_dict(key, scope) + credentials = ServiceAccountCredentials.from_json_keyfile_dict(key, scope) if credentials is None else credentials self.drive = build("drive", "v3", credentials=credentials) except Exception as e: print(e) diff --git a/code/dihlibs/evaluator.py b/code/dihlibs/evaluator.py new file mode 100644 index 0000000..2e025da --- /dev/null +++ b/code/dihlibs/evaluator.py @@ -0,0 +1,144 @@ +import re + +arithmetic_ops = { + "+": lambda a, b: a + b, + "-": lambda a, b: a - b, + "*": lambda a, b: a * b, + "/": lambda a, b: a / b, +} + +comparison_ops = { + ">": lambda a, b: 1.0 if a > b else 0.0, + "<": lambda a, b: 1.0 if a < b else 0.0, + "==": lambda a, b: 1.0 if a == b else 0.0, + "!=": lambda a, b: 1.0 if a != b else 0.0, + ">=": lambda a, b: 1.0 if a >= b else 0.0, + "<=": lambda a, b: 1.0 if a <= b else 0.0, +} + +logical_ops = { + "&": lambda a, b: 1.0 if (a != 0 and b != 0) else 0.0, + "&&": lambda a, b: 1.0 if (a != 0 and b != 0) else 0.0, + "|": lambda a, b: 1.0 if (a != 0 or b != 0) else 0.0, + "||": lambda a, b: 1.0 if (a != 0 or b != 0) else 0.0, +} + +operations = {**arithmetic_ops, **comparison_ops, **logical_ops} + + +def _handle_operator(output, operators, token): + while operators and _precedence(operators[-1]) >= _precedence(token): + output += operators.pop() + " " + operators.append(token) + return output + + +def _handle_parenthesis(output, operators, parenthesis): + if parenthesis == "(": + operators.append(parenthesis) + elif parenthesis == ")": + while operators and operators[-1] != "(": + output += operators.pop() + " " + if operators and operators[-1] == "(": + operators.pop() + return output + + +def _precedence(operator): + return { + "+": 1, "-": 1, "*": 2, "/": 2, + "^": 3, "<": 4, ">": 4, "<=": 4, + ">=": 4, "==": 4, "!=": 4, "!": 4, "~": 4, + }.get(operator, -1) + + +def _is_operator(token): + return bool(re.match(r"^[+\-*/^<>!=&|~]+$", token)) + + +def _is_string_operator(token): + return bool(re.match(r"^[<>=~!]+$", token)) + + +def _apply_operator(op, a, b): + if op in operations: + return operations[op](a, b) + else: + raise ValueError(f"Unsupported operator: {op}") + + +def _apply_string_operator(op, a, b): + if op in operations: + return operations[op](a, b) + elif op=="~": + return 1.0 if re.search(b, a) else 0.0 + else: + raise ValueError(f"Unsupported operator for strings: {op}") + + +def _to_postfix(infix): + output = "" + operators = [] + token_pattern = re.compile(r"\d+\.?\d*|'[^']*'|[a-zA-Z]+|[+\-*/^<>!=&|~]+|[()]") + + for m in token_pattern.finditer(infix): + token = m.group(0) + if re.match(r"^(?:\d+\.?\d*|'[^']*'|[a-zA-Z]+)$", token): + output += token + " " + elif token in (")", "("): + output = _handle_parenthesis(output, operators, token) + elif _is_operator(token): + output = _handle_operator(output, operators, token) + else: + raise ValueError(f"Unexpected token: {token}") + + while operators: + output += operators.pop() + " " + + return output + + +def _evaluate_postfix(postfix): + stack = [] + token_pattern = re.compile(r"\d+\.?\d*|'[^']*'|[a-zA-Z]+|[+\-*/^()<>!=&|~]+") + + for m in token_pattern.finditer(postfix): + token = m.group(0) + if token in ['true','false']: + stack.append(1.0 if token=="true" else 0) + elif re.match(r"^\d+\.?\d*$", token): + stack.append(float(token)) + elif re.match(r"^'[^']*'$", token): + stack.append(token[1:-1]) + elif _is_operator(token): + _operate(token, stack) + elif re.match(r"^\w+$", token): + stack.append(token) + else: + raise ValueError(f"Unexpected token: {token}") + return stack.pop() == 1.0 + +def _operate(token, stack): + b = stack.pop() + answer = None + + if token == "!" and isinstance(b, float): + answer = 1.0 if b == 0 else 0.0 + else: + a = stack.pop() + if isinstance(a, (int, float)) and isinstance(b, (int, float)): + answer = _apply_operator(token, a, b) + elif _is_string_operator(token): + answer = _apply_string_operator(token, a, b) + else: + answer = None + + if answer == None: # Check for NaN + error = f"Unsupported operand type for operator: {token} operand {a} and {b}" + print(error, token, a, b) + stack.append(answer) + + +def evaluate(expression): + return _evaluate_postfix(_to_postfix(expression)) + diff --git a/code/dihlibs/functions.py b/code/dihlibs/functions.py index 082174c..ec1e38f 100644 --- a/code/dihlibs/functions.py +++ b/code/dihlibs/functions.py @@ -7,11 +7,7 @@ from dateutil.relativedelta import relativedelta from collections import namedtuple import numpy as np -import asyncio, aiohttp -import yaml -import string -import os -import select +import asyncio, aiohttp, yaml, string, os, hashlib, select from dihlibs.command import _Command from collections import deque from fuzzywuzzy import fuzz @@ -316,3 +312,14 @@ def fuzzy_match(left_df,right_df,left_keys=[],right_keys=[],method="0"): left_df.loc[left_df[lkey].isna(),rcolumns]='' return left_df.sort_values('match',ascending=False).drop(columns=[rkey,lkey]).reset_index(drop=True) + + +def uuid_from_hash(input_string): + if not isinstance(input_string, str): + raise ValueError("Input must be a string") + hash = hashlib.sha256(input_string.encode()).hexdigest() + hash = hash[:12] + '4' + hash[13:] + variant_char = (int(hash[16], 16) & 0x3) | 0x8 + hash = hash[:16] + format(variant_char, 'x') + hash[17:] + uuid = f'{hash[:8]}-{hash[8:12]}-{hash[12:16]}-{hash[16:20]}-{hash[20:32]}' + return uuid \ No newline at end of file diff --git a/code/dihlibs/jsonq.py b/code/dihlibs/jsonq.py new file mode 100644 index 0000000..c9fd8f8 --- /dev/null +++ b/code/dihlibs/jsonq.py @@ -0,0 +1,230 @@ +import re +from evaluator import evaluate; +import json + +class JsonQ: + PRIMITIVE_TYPES = {"string", "number", "boolean"} + REGX = { + "integer": re.compile(r"^\d+$"), + "regular": re.compile(r"\w+(?:\.(?!\w*\*)\w+)*"), + "array": re.compile(r"\[(?:(-?\d+:?-?\d*)|(\??\(.*\))|(\*))\]"), + "array_bracket": re.compile(r"\[[^\]]+\]"), + "globbed": re.compile(r"\w*\*\w*"), + "wildcard": re.compile(r"\.{2,}(?:\.?\w+)*"), + "expression": re.compile(r".*\[\??\(.*\)"), + "variable": re.compile(r"@\.(\w+)"), + "true": re.compile(r"yes|ndio|ndiyo|true", re.I), + "false": re.compile(r"hapana|no|false", re.I), + "date": re.compile(r"(?:\d{4}-\d{2}-\d{2}|\d{2}-\d{2}-\d{4}|)") + } + + @staticmethod + def match(type, string): + pattern=f'^{JsonQ.REGX[type].pattern}$' + return bool(re.compile(pattern).match(string)) + + @staticmethod + def get_splitter(): + return re.compile( + "|".join(f"({JsonQ.REGX[r].pattern})" for r in ["globbed", "regular", "wildcard", "array_bracket"]), + re.VERBOSE + ) + + def __init__(self, json_obj): + if isinstance(json_obj, str): + self.root = json.loads(json_obj) + else: + self.root = json_obj + + def _evaluate_path(self, json_path): + matched = re.finditer(JsonQ.get_splitter(), json_path) + return [part for parts in matched for part in parts.groups() if part] + + def _collection_for_each(self, input, take): + if isinstance(input, list): + self.flat_for_each(input, take) + else: + take("", input) + + def get_object_root(self, obj): + return obj if not isinstance(obj, JsonQ) else obj.root + + def handle_array_match(self, path, obj, results): + parts = JsonQ.REGX["array"].match(path) + if not parts: + return + + if parts[3]: + self._collection_for_each(obj, lambda _, v: results.append(v)) + elif parts[2]: + self.filter(parts[2], obj, results) + elif parts[1]: + self._collection_for_each(obj, lambda _, v: results.append(v)) + self.slice_list(results, parts[1]) + + def slice_list(self, lst, slice_notation): + if not slice_notation: + return + + slices = slice_notation.split(",") + results = [] + for slice in slices: + parts = slice.split(":") + single_index = len(parts) == 1 and parts[0] + length = len(lst) + start = int(parts[0]) if len(parts) > 0 and parts[0] else 0 + end = int(parts[1]) if len(parts) > 1 and parts[1] else length + start = max(start if start >= 0 else length + start, 0) + end = start + 1 if single_index else min(end if end >= 0 else length + end, length) + for i in range(start, end): + results.append(lst[i]) + + lst.clear() + lst.extend(results) + + def is_primitive(self, data): + return type(data).__name__ in JsonQ.PRIMITIVE_TYPES + + + def filter(self, expression, object, results): + def _filter_inner(key, obj): + obj = self.get_object_root(obj) + if isinstance(obj, dict): + exp = expression + for m in re.finditer(JsonQ.REGX["variable"], exp): + variable = m.group(1) + value = self.prep_variable_for_expression(obj.get(variable)) + if value is None: + return + exp = exp.replace(f"@.{variable}", value) + evaluation = evaluate(exp.replace("[]\\[]", "")) + elif self.is_primitive(obj): + value = self.prep_variable_for_expression(obj) + if value is None: + return + evaluation = evaluate(expression.replace("@." + key, value)) + else: + evaluation = False + if evaluation: + results.append(obj) + + self._collection_for_each(object,_filter_inner) + + + def prep_variable_for_expression(self, value): + if not isinstance(value, str): + return str(value) if self.is_primitive(value) else None + + if JsonQ.match("true", value): + return "true" + elif JsonQ.match("false", value): + return "false" + else: + return f"'{value}'" + + def handle_normal_path(self, path, obj): + if not path: + return obj + + current = obj + for p in path.split('.'): + current = self.value_at_key(p, current) + + return None if current == obj else current + + def value_at_key(self, key, json_thing): + if isinstance(json_thing, list) and key.isdigit(): + return json_thing[int(key)] + elif isinstance(json_thing, dict): + return json_thing.get(key) + else: + return json_thing + + def globbed_path(self, path, json_thing, results): + self.flat_for_each(json_thing, lambda k, v: self._globbed_inner(path, k, v, results)) + + def _globbed_inner(self, path, k, v, results): + regex = re.compile(path.replace("*", "\\w*")) + if regex.match(k): + results.append(v) + + def find_matching_path(self, path, root, results): + stack = [root] + seen = set() + path = re.sub(r"^[^\w*]+", "", path) + + while stack: + current = stack.pop() + res = self.handle_normal_path(path, current) if "*" not in path else self.globbed_path(path.replace("*", "\\w*"), current, results) + self.flat_for_each(current, lambda key, obj: self._matching_inner(obj, stack, seen)) + + def _matching_inner(self, obj, stack, seen): + if obj is None or id(obj) in seen: + return + stack.append(obj) + seen.add(id(obj)) + + def flat_for_each(self, collection, callback): + if isinstance(collection, list): + for index, value in enumerate(collection): + callback(index, value) + elif isinstance(collection, dict): + for key, value in collection.items(): + callback(key, value) + elif collection is not None: + callback("", collection) + + def find(self, json_path, root): + results = [] + is_match = re.match(r"^(\.|)$", json_path) + if is_match: + return [root] + if self.is_primitive(root): + return results + + paths = self._evaluate_path(json_path) + results.append(root) + + temp = [] + for path in paths: + if not results: + return results + + temp.clear() + taker = None + if JsonQ.match("regular", path): + taker = lambda _, obj: temp.append(self.handle_normal_path(path, obj)) + elif JsonQ.match("globbed", path): + taker = lambda _, obj: self.globbed_path(path, obj, temp) + elif JsonQ.match("expression", path): + taker = lambda _, obj: self.filter(path, obj, temp) + elif JsonQ.match("wildcard", path): + taker = lambda _, obj: self.find_matching_path(path, obj, temp) + elif JsonQ.match("array", path): + taker = lambda _, obj: self.handle_array_match(path, obj, temp) + else: + print(f"Path not found {json_path}. When processing this part {path}") + return [] + + self._collection_for_each(results, taker) + results.clear() + results.extend(temp) + + results.clear() + results.extend([obj for obj in temp if obj]) + return results + + def get(self, path): + return JsonQ(self.find(path, self.root)) + + def put(self, path, value): + i = path.rfind(".") + prop = path[i + 1:] + object_path = path[:i] + self.flat_for_each(self.get(object_path).root, lambda _, obj: obj.update({prop: value})) + + def val(self): + return self.root + + def for_each(self, callback): + self.flat_for_each(self.root, lambda k, v: callback(k, JsonQ(v))) diff --git a/code/setup.py b/code/setup.py index a7c5926..bc0ef36 100644 --- a/code/setup.py +++ b/code/setup.py @@ -2,7 +2,7 @@ setup( name="dihlibs", - version="0.0.38", + version="0.0.43", author="Nitu", author_email="nkataraia@d-tree.org", description="A helper package for data integrations", @@ -36,13 +36,16 @@ "openpyxl", "SQLAlchemy", "psycopg2-binary", - "pysqlcipher3", + # "pysqlcipher3", "google-api-python-client", "google-auth-httplib2", "google-auth-oauthlib", "oauth2client", "aiohttp", "pyyaml", + "fuzzywuzzy", + "setuptools", + "python-Levenshtein", ], entry_points={ "console_scripts": [