Skip to content

Commit

Permalink
Merge pull request #63 from HybriD3-database/stoichiometry_enchanced_…
Browse files Browse the repository at this point in the history
…formula_parsing_v2

Updated stoichiometry parsing for bracket multipliers
  • Loading branch information
uthpalaherath authored Nov 20, 2024
2 parents 101d162 + daf7f40 commit 90af909
Show file tree
Hide file tree
Showing 4 changed files with 407 additions and 275 deletions.
19 changes: 10 additions & 9 deletions materials/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from fractions import Fraction
from decimal import Decimal, ROUND_HALF_UP
import re
from .utils import parse_formula

admin.site.site_header = mark_safe(f"{MATD3_NAME} database")

Expand Down Expand Up @@ -73,15 +74,15 @@ class SystemStoichiometryInline(nested_admin.NestedTabularInline):
def save_model(self, request, obj, form, change):
super().save_model(request, obj, form, change)
Stoichiometry_Elements.objects.filter(system_stoichiometry=obj).delete()
element_pattern = r"([A-Z][a-z]*):(\d+(?:\.\d+)?|\d+(?:\.\d+)?/\d+(?:\.\d+)?)"
elements = re.findall(element_pattern, obj.stoichiometry)
for element, count_str in elements:
if "/" in count_str:
numerator, denominator = count_str.split("/")
count = Decimal(numerator) / Decimal(denominator)
else:
count = Decimal(count_str)
# Format counts
# Remove chiral prefixes
formula = re.sub(r"(\(R/S\)-|\b[SR]-)", "", obj.stoichiometry)
# Parse the formula using the same logic
try:
element_counts = parse_formula(formula)
except Exception as e:
# Handle parsing errors if necessary
return
for element, count in element_counts.items():
if count == count.to_integral():
count_formatted = str(count.to_integral())
else:
Expand Down
78 changes: 1 addition & 77 deletions materials/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,83 +9,7 @@
from fractions import Fraction
from decimal import Decimal, ROUND_HALF_UP
import re


def parse_formula(formula):
# Updated regex to include decimal numbers and fractions inside brackets
token_pattern = r"([A-Z][a-z]?|\d+(\.\d+)?|\([\d\.]+(\/[\d\.]+)?\)|\{[\d\.]+(\/[\d\.]+)?\}|\[[\d\.]+(\/[\d\.]+)?\]|[\(\)\[\]\{\}])"
tokens = re.findall(token_pattern, formula)
# Flatten the tokens list
tokens = [token[0] for token in tokens]
if not tokens:
return {}
stack = [{}]
bracket_stack = []
i = 0
while i < len(tokens):
token = tokens[i]
if token in "([{":
stack.append({})
bracket_stack.append(token)
i += 1
elif token in ")]}":
if not bracket_stack:
raise ValueError("Unmatched closing bracket in formula")
opening = bracket_stack.pop()
expected_closing = {"(": ")", "[": "]", "{": "}"}[opening]
if token != expected_closing:
raise ValueError("Mismatched brackets in formula")
top = stack.pop()
i += 1
multiplier = Decimal("1")
if i < len(tokens) and (
re.match(r"^\d+(\.\d+)?$", tokens[i])
or re.match(r"^[\(\[\{][\d\.]+(\/[\d\.]+)?[\)\]\}]$", tokens[i])
):
if re.match(r"^[\(\[\{][\d\.]+\/[\d\.]+[\)\]\}]$", tokens[i]):
# Handle fractions inside brackets
fraction = tokens[i][1:-1].split("/")
numerator = Decimal(fraction[0])
denominator = Decimal(fraction[1])
multiplier = numerator / denominator
elif re.match(r"^[\(\[\{][\d\.]+[\)\]\}]$", tokens[i]):
# Handle decimal numbers inside brackets
multiplier = Decimal(tokens[i][1:-1])
else:
# Handle plain numbers
multiplier = Decimal(tokens[i])
i += 1
for element, count in top.items():
stack[-1][element] = (
stack[-1].get(element, Decimal("0")) + count * multiplier
)
elif re.match(r"^[A-Z][a-z]?$", token):
element = token
i += 1
count = Decimal("1")
if i < len(tokens) and (
re.match(r"^\d+(\.\d+)?$", tokens[i])
or re.match(r"^[\(\[\{][\d\.]+(\/[\d\.]+)?[\)\]\}]$", tokens[i])
):
if re.match(r"^[\(\[\{][\d\.]+\/[\d\.]+[\)\]\}]$", tokens[i]):
# Handle fractions inside brackets
fraction = tokens[i][1:-1].split("/")
numerator = Decimal(fraction[0])
denominator = Decimal(fraction[1])
count = numerator / denominator
elif re.match(r"^[\(\[\{][\d\.]+[\)\]\}]$", tokens[i]):
# Handle decimal numbers inside brackets
count = Decimal(tokens[i][1:-1])
else:
# Handle plain numbers
count = Decimal(tokens[i])
i += 1
stack[-1][element] = stack[-1].get(element, Decimal("0")) + count
else:
i += 1
if bracket_stack:
raise ValueError("Unmatched opening bracket in formula")
return stack[0]
from .utils import parse_formula


@receiver(post_save, sender=System)
Expand Down
Loading

0 comments on commit 90af909

Please sign in to comment.