.

d-tree-org · Jul 12, 2024 · e692ab1 · e692ab1
1 parent ee652fa
commit e692ab1
Show file tree

Hide file tree

Showing 12 changed files with 441 additions and 15 deletions.
diff --git a/code/dihlibs/SQLCipherDialect.py b/code/dihlibs/SQLCipherDialect.py
@@ -0,0 +1,38 @@
+from sqlalchemy.dialects.sqlite.base import SQLiteDialect
+from sqlalchemy.engine.url import make_url
+import pysqlcipher3.dbapi2 as sqlcipher
+import dihlibs.functions as fn
+
+
+class SQLCipherDialect(SQLiteDialect):
+    name = "sqlcipher"
+    driver = "pysqlcipher3"
+    paramstyle = "qmark"
+    supports_statement_cache = True
+    key = None
+
+    @classmethod
+    def dbapi(cls):
+        return sqlcipher
+
+    def create_connect_args(self, url):
+        parsed_url = make_url(url)
+        self.key = parsed_url.query.get("key", None)
+        self._adb_pulldb_if_android_db(parsed_url)
+        opts = url.translate_connect_args()
+        opts.pop("key", None)
+        return [[], opts]
+
+    def connect(self, *cargs, **cparams):
+        dbapi_con = super().connect(*cargs, **cparams)
+        if self.key:
+            dbapi_con.execute(f"PRAGMA key='{self.key}';")
+        return dbapi_con
+
+    def _adb_pulldb_if_android_db(self, parsed_url):
+        package = parsed_url.query.get("package", None)
+        if not package:
+            return
+        db = parsed_url.database
+        cmd = f"$HOME/Android/Sdk/platform-tools/adb exec-out run-as {package}  cat /data/data/{package}/databases/{db} > ./{db} "
+        print(fn.cmd_wait(cmd))
diff --git a/code/dihlibs/command.py b/code/dihlibs/command.py
@@ -1,8 +1,6 @@
 from concurrent.futures import ThreadPoolExecutor
 import concurrent.futures
-from typing import Callable, Any
 from subprocess import Popen, PIPE
-import select,os
 from pathlib import Path
 import pkg_resources
 import dihlibs.functions as fn
@@ -11,7 +9,6 @@
 
 class _Command:
     def __init__(self, cmd, bg=True):
-        # bash_functions=Path(__file__).parent / "bash/script.sh"
         bash_functions = pkg_resources.resource_filename('dihlibs', 'data/bash/script.sh')
         self.cmd = f'. $HOME/.bashrc && .  {bash_functions}  && {cmd.strip()}'
         self.bg = bg

diff --git a/code/dihlibs/data/docker/backend.zip b/code/dihlibs/data/docker/backend.zip
diff --git a/code/dihlibs/data/docker/cronies.zip b/code/dihlibs/data/docker/cronies.zip
diff --git a/code/dihlibs/db.py b/code/dihlibs/db.py
@@ -131,4 +131,4 @@ def upate_table_df(self, df, tablename, id_column="id"):
         )
         return self.exec(sql)
 
-registry.register("sqlcipher", "dihlibs.SQLCipherDialect", "SQLCipherDialect")
+# registry.register("sqlcipher", "dihlibs.SQLCipherDialect", "SQLCipherDialect")
diff --git a/code/dihlibs/dhis/__init__.py b/code/dihlibs/dhis/__init__.py
@@ -213,11 +213,17 @@ def get_period(self, when, period_type="monthly"):
             }
         ).get(period_type.lower())
 
+    def get_week_date(self,date): 
+        parts = date.split("W")
+        week_start = 7 * int(parts[1])
+        year_start = datetime.strptime(parts[0], "%Y")
+        return year_start +  relativedelta(days=abs(week_start))
+
     def period_to_db_date(self, date: str):
         formats = ["%Y-%m-%d", "%YW%W", "%Y%m", "%Y"]
         for fmt in formats:
             try:
-                dt = datetime.strptime(date, fmt)
+                dt = datetime.strptime(date, fmt) if "W" not in date else self.get_week_date(date)
                 return dt.strftime("%Y-%m-%d")
             except ValueError:
                 pass
@@ -235,6 +241,7 @@ def set_period_cols(r):
         e_map = e_map.reset_index().merge(
             self.datasets, left_on="dataset_id", right_on="id"
         )
+
         e_map.loc[:, ["period_column", "period_db", "period"]] = e_map.apply(
             set_period_cols, axis=1
         ).to_list()

diff --git a/code/dihlibs/dhis/meta.py b/code/dihlibs/dhis/meta.py
@@ -38,7 +38,7 @@ def _normalize_combo(self, input):
     def add_category_combo(self):
         res=rq.get(f"{self._base_url}/api/categoryCombos?paging=false&fields=id~rename(categoryCombo),name~rename(comboName)").json()
         combos=pd.DataFrame(res.get('categoryCombos'))
-        clean=lambda input:','.join(sorted(re.split(r'(?:\s+)?(?:,|and)(?:\s+)?',input))).replace(' ','_').lower()
+        # clean=lambda input:','.join(sorted(re.split(r'(?:\s+)?(?:,|and)(?:\s+)?',input))).replace(' ','_').lower()
         combos['comboName']=combos.comboName.apply(self._normalize_combo)
         self._map['comboName']=self._map.disaggregation.fillna('default').apply(self._normalize_combo)
         return self._map.merge(combos,how='left',on='comboName')

diff --git a/code/dihlibs/drive.py b/code/dihlibs/drive.py
@@ -10,13 +10,13 @@
 
 
 class Drive:
-    def __init__(self, key: dict):
+    def __init__(self, key: dict=None,credentials=None):
         try:
             scope = [
                 "https://www.googleapis.com/auth/drive.file",
                 "https://www.googleapis.com/auth/drive.readonly",
             ]
-            credentials = ServiceAccountCredentials.from_json_keyfile_dict(key, scope)
+            credentials = ServiceAccountCredentials.from_json_keyfile_dict(key, scope) if credentials is None else credentials
             self.drive = build("drive", "v3", credentials=credentials)
         except Exception as e:
             print(e)

diff --git a/code/dihlibs/evaluator.py b/code/dihlibs/evaluator.py
@@ -0,0 +1,144 @@
+import re
+
+arithmetic_ops = {
+    "+": lambda a, b: a + b,
+    "-": lambda a, b: a - b,
+    "*": lambda a, b: a * b,
+    "/": lambda a, b: a / b,
+}
+
+comparison_ops = {
+    ">": lambda a, b: 1.0 if a > b else 0.0,
+    "<": lambda a, b: 1.0 if a < b else 0.0,
+    "==": lambda a, b: 1.0 if a == b else 0.0,
+    "!=": lambda a, b: 1.0 if a != b else 0.0,
+    ">=": lambda a, b: 1.0 if a >= b else 0.0,
+    "<=": lambda a, b: 1.0 if a <= b else 0.0,
+}
+
+logical_ops = {
+    "&": lambda a, b: 1.0 if (a != 0 and b != 0) else 0.0,
+    "&&": lambda a, b: 1.0 if (a != 0 and b != 0) else 0.0,
+    "|": lambda a, b: 1.0 if (a != 0 or b != 0) else 0.0,
+    "||": lambda a, b: 1.0 if (a != 0 or b != 0) else 0.0,
+}
+
+operations = {**arithmetic_ops, **comparison_ops, **logical_ops}
+
+
+def _handle_operator(output, operators, token):
+    while operators and _precedence(operators[-1]) >= _precedence(token):
+        output += operators.pop() + " "
+    operators.append(token)
+    return output
+
+
+def _handle_parenthesis(output, operators, parenthesis):
+    if parenthesis == "(":
+        operators.append(parenthesis)
+    elif parenthesis == ")":
+        while operators and operators[-1] != "(":
+            output += operators.pop() + " "
+        if operators and operators[-1] == "(":
+            operators.pop()
+    return output
+
+
+def _precedence(operator):
+    return {
+        "+": 1, "-": 1, "*": 2, "/": 2,
+        "^": 3, "<": 4, ">": 4, "<=": 4,
+        ">=": 4, "==": 4, "!=": 4, "!": 4, "~": 4,
+    }.get(operator, -1)
+
+
+def _is_operator(token):
+    return bool(re.match(r"^[+\-*/^<>!=&|~]+$", token))
+
+
+def _is_string_operator(token):
+    return bool(re.match(r"^[<>=~!]+$", token))
+
+
+def _apply_operator(op, a, b):
+    if op in operations:
+        return operations[op](a, b)
+    else:
+        raise ValueError(f"Unsupported operator: {op}")
+
+
+def _apply_string_operator(op, a, b):
+    if op in operations:
+        return operations[op](a, b)
+    elif op=="~": 
+        return 1.0 if re.search(b, a) else 0.0
+    else:
+        raise ValueError(f"Unsupported operator for strings: {op}")
+
+
+def _to_postfix(infix):
+    output = ""
+    operators = []
+    token_pattern = re.compile(r"\d+\.?\d*|'[^']*'|[a-zA-Z]+|[+\-*/^<>!=&|~]+|[()]")
+
+    for m in token_pattern.finditer(infix):
+        token = m.group(0)
+        if re.match(r"^(?:\d+\.?\d*|'[^']*'|[a-zA-Z]+)$", token):
+            output += token + " "
+        elif token in (")", "("):
+            output = _handle_parenthesis(output, operators, token)
+        elif _is_operator(token):
+            output = _handle_operator(output, operators, token)
+        else:
+            raise ValueError(f"Unexpected token: {token}")
+
+    while operators:
+        output += operators.pop() + " "
+
+    return output
+
+
+def _evaluate_postfix(postfix):
+    stack = []
+    token_pattern = re.compile(r"\d+\.?\d*|'[^']*'|[a-zA-Z]+|[+\-*/^()<>!=&|~]+")
+
+    for m in token_pattern.finditer(postfix):
+        token = m.group(0)
+        if token in ['true','false']:
+            stack.append(1.0 if token=="true" else 0)
+        elif re.match(r"^\d+\.?\d*$", token):
+            stack.append(float(token))
+        elif re.match(r"^'[^']*'$", token):
+            stack.append(token[1:-1])
+        elif _is_operator(token):
+            _operate(token, stack)
+        elif re.match(r"^\w+$", token):
+            stack.append(token)
+        else:
+            raise ValueError(f"Unexpected token: {token}")
+    return stack.pop() == 1.0
+
+def _operate(token, stack):
+    b = stack.pop()
+    answer = None
+
+    if token == "!" and isinstance(b, float):
+        answer = 1.0 if b == 0 else 0.0
+    else:
+        a = stack.pop()
+        if isinstance(a, (int, float)) and isinstance(b, (int, float)):
+            answer = _apply_operator(token, a, b)
+        elif _is_string_operator(token):
+            answer = _apply_string_operator(token, a, b)
+        else:
+            answer = None
+
+    if answer == None:  # Check for NaN
+        error = f"Unsupported operand type for operator: {token} operand {a} and {b}"
+        print(error, token, a, b)
+    stack.append(answer)
+
+
+def evaluate(expression):
+    return _evaluate_postfix(_to_postfix(expression))
+
diff --git a/code/dihlibs/functions.py b/code/dihlibs/functions.py
@@ -7,11 +7,7 @@
 from dateutil.relativedelta import relativedelta
 from collections import namedtuple
 import numpy as np
-import asyncio, aiohttp
-import yaml
-import string
-import os
-import select
+import asyncio, aiohttp, yaml, string, os, hashlib, select
 from dihlibs.command import _Command
 from collections import deque
 from fuzzywuzzy import fuzz
@@ -316,3 +312,14 @@ def fuzzy_match(left_df,right_df,left_keys=[],right_keys=[],method="0"):
     left_df.loc[left_df[lkey].isna(),rcolumns]=''
 
     return left_df.sort_values('match',ascending=False).drop(columns=[rkey,lkey]).reset_index(drop=True)
+
+
+def uuid_from_hash(input_string):
+    if not isinstance(input_string, str):
+        raise ValueError("Input must be a string")
+    hash = hashlib.sha256(input_string.encode()).hexdigest()
+    hash = hash[:12] + '4' + hash[13:]
+    variant_char = (int(hash[16], 16) & 0x3) | 0x8
+    hash = hash[:16] + format(variant_char, 'x') + hash[17:]
+    uuid = f'{hash[:8]}-{hash[8:12]}-{hash[12:16]}-{hash[16:20]}-{hash[20:32]}'
+    return uuid