Skip to content

Commit

Permalink
Add digests
Browse files Browse the repository at this point in the history
  • Loading branch information
sgreenbury committed Sep 29, 2023
1 parent 635b65d commit c0fefae
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions scripts/data_prep/prep_dl.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
#!/usr/bin/env python
# coding: utf-8

import os
import requests
import pandas as pd
import hashlib

DIGESTS = {
"Output_Areas_Dec_2011_PWC_2022.csv": "4405d695f10556a0f3ff35e36f3aaa1013102da6a4a0c0fffd26554a318faa4f",
"LSOA_Dec_2011_PWC_in_England_and_Wales_2022.csv": "4bbc2e6b58302ee274e9c75c6bd9bc068074da08909ca7cffe751c3eb10034e5"
}

OA_URL = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Output_Areas_Dec_2011_PWC_2022/FeatureServer/0/query?where=1%3D1&outFields=*&f=json"
LSOA_URL = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LSOA_Dec_2011_PWC_in_England_and_Wales_2022/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=&f=json"

def check_digest(file_name):
digest = hashlib.sha256(open(file_name, 'rb').read()).hexdigest()
assert(digest == DIGESTS[os.path.basename(file_name)])

def get_flattened_df(url: str) -> pd.DataFrame:
"""Gets data from API through repeated offset calls until all collected."""
Expand Down Expand Up @@ -38,16 +48,22 @@ def main():
print("Getting LSOA data...")
df_lsoas = get_flattened_df(LSOA_URL)
outpath = "Data/dl"
oa_file_name = os.path.join(outpath, "Output_Areas_Dec_2011_PWC_2022.csv")
lsoa_file_name = os.path.join(outpath, "LSOA_Dec_2011_PWC_in_England_and_Wales_2022.csv")
# OA output and check
df_oas.to_csv(
f"{outpath}/Output_Areas_Dec_2011_PWC_2022.csv",
oa_file_name,
index=None,
line_terminator="\r\n",
)
check_digest(oa_file_name)
# LSOA out and check
df_lsoas.rename(columns={"lsoa11nm": "LSOA11NM", "lsoa11cd": "LSOA11CD"}).to_csv(
f"{outpath}/LSOA_Dec_2011_PWC_in_England_and_Wales_2022.csv",
lsoa_file_name,
index=None,
line_terminator="\r\n",
)
check_digest(lsoa_file_name)


if __name__ == "__main__":
Expand Down

0 comments on commit c0fefae

Please sign in to comment.