From 60d5bc686b4739de3835c567d4e8ada02174612d Mon Sep 17 00:00:00 2001 From: Simon Roberts Date: Sun, 24 Mar 2024 13:05:04 +1100 Subject: [PATCH 1/4] Add shebang and +x to make adhoc_tools.py directly executable --- code/adhoc_tools.py | 1 + 1 file changed, 1 insertion(+) mode change 100644 => 100755 code/adhoc_tools.py diff --git a/code/adhoc_tools.py b/code/adhoc_tools.py old mode 100644 new mode 100755 index 84705a0336..9c07401562 --- a/code/adhoc_tools.py +++ b/code/adhoc_tools.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 import argparse import csv import glob From d2bebe221475860693c52b735259de8f2e964e7b Mon Sep 17 00:00:00 2001 From: Simon Roberts Date: Sun, 24 Mar 2024 13:12:12 +1100 Subject: [PATCH 2/4] Add progress to generate_all_suburbs_nbn_tallies --- code/adhoc_tools.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/code/adhoc_tools.py b/code/adhoc_tools.py index 9c07401562..67166d731c 100755 --- a/code/adhoc_tools.py +++ b/code/adhoc_tools.py @@ -239,7 +239,11 @@ def generate_all_suburbs_nbn_tallies(): """Create a file containing a tally of all suburbs by property (tech, upgrade, etc)""" exclude_properties = {"name", "locID", "gnaf_pid"} tallies = {} # property-name -> Counter() - for file in glob.glob("results/**/*.geojson"): + filenames = glob.glob("results/**/*.geojson") + for n, file in enumerate(filenames): + if n % 100 == 0: + utils.print_progress_bar(n, len(filenames), prefix="Progress:", suffix="Complete", length=50) + for feature in utils.read_json_file(file)["features"]: for prop, value in feature["properties"].items(): if prop not in exclude_properties: From 6fc02e3570b18a94f4c1ea96c21a377b76330383 Mon Sep 17 00:00:00 2001 From: Simon Roberts Date: Sun, 24 Mar 2024 13:36:31 +1100 Subject: [PATCH 3/4] sort tallies by frequency, except 'target_eligibility_quarter' which is sorted by date --- code/adhoc_tools.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/code/adhoc_tools.py b/code/adhoc_tools.py index 67166d731c..d87bfdd40c 100755 --- a/code/adhoc_tools.py +++ b/code/adhoc_tools.py @@ -251,6 +251,16 @@ def generate_all_suburbs_nbn_tallies(): tallies[prop] = Counter() tallies[prop][value] += 1 + def _parse_quarter(item: tuple[str, int]): + """Parse a quarter string into a datetime object. If NA, return epoch.""" + return datetime.fromtimestamp(0) if item[0] == "NA" else datetime.strptime(item[0], "%b %Y") + + # sort tallies by frequency, except 'target_eligibility_quarter' which is sorted by date + tallies = { + k: OrderedDict(sorted(v.items(), key=_parse_quarter) if k == "target_eligibility_quarter" else v.most_common()) + for k, v in tallies.items() + } + # Add percentages and missing items total_count = sum(tallies["tech"].values()) # everything has a tech+NULL tallies["percent"] = {} From dac1b99e3d8eb295befeb84c73d02234e44aeb00 Mon Sep 17 00:00:00 2001 From: Simon Roberts Date: Sun, 24 Mar 2024 13:46:34 +1100 Subject: [PATCH 4/4] Handle bad date formats better --- code/adhoc_tools.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/code/adhoc_tools.py b/code/adhoc_tools.py index d87bfdd40c..ff5970d5a5 100755 --- a/code/adhoc_tools.py +++ b/code/adhoc_tools.py @@ -253,7 +253,10 @@ def generate_all_suburbs_nbn_tallies(): def _parse_quarter(item: tuple[str, int]): """Parse a quarter string into a datetime object. If NA, return epoch.""" - return datetime.fromtimestamp(0) if item[0] == "NA" else datetime.strptime(item[0], "%b %Y") + try: + return datetime.strptime(item[0], "%b %Y") + except ValueError: + return datetime.fromtimestamp(0) # sort tallies by frequency, except 'target_eligibility_quarter' which is sorted by date tallies = {