Skip to content

Commit

Permalink
Merge commit '233f2aef8865f9b003a32cc5459efd67b1f7313e' into staging
Browse files Browse the repository at this point in the history
# Conflicts:
#	product/HS/IntlAtlas/out/hs92_atlas.csv
#	product/HS/IntlAtlas/out/hs92_atlas.dta
#	setup.py
  • Loading branch information
bleonard33 committed Aug 7, 2018
2 parents bf1a75e + 233f2ae commit 4abafae
Show file tree
Hide file tree
Showing 7 changed files with 15,200 additions and 1,312 deletions.
60 changes: 33 additions & 27 deletions product/HS/IntlAtlas/clean.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,31 @@
import pandas as pd

from classification import (Hierarchy, repeated_table_to_parent_id_table,
parent_code_table_to_parent_id_table,
spread_out_entries, sort_by_code_and_level,
Classification)
from classification import (
Hierarchy,
repeated_table_to_parent_id_table,
parent_code_table_to_parent_id_table,
spread_out_entries,
sort_by_code_and_level,
Classification,
)

if __name__ == "__main__":
names = pd.read_table("./in/HS92_Atlas_Names.tsv", encoding="utf-8",
dtype={"code": str})
names = pd.read_table(
"./in/HS92_Atlas_Names.tsv", encoding="utf-8", dtype={"code": str}
)

hierarchy = pd.read_table("./in/HS92_Atlas_Hierarchy.tsv", encoding="utf-8", dtype="str")
hierarchy = pd.read_table(
"./in/HS92_Atlas_Hierarchy.tsv", encoding="utf-8", dtype="str"
)

services = pd.read_table("./in/Services_Hierarchy.tsv", encoding="utf-8", dtype="str")
services = pd.read_table(
"./in/Services_Hierarchy.tsv", encoding="utf-8", dtype="str"
)

fields = {
"section": [],
"2digit": [],
"4digit": [],
}
fields = {"section": [], "2digit": [], "4digit": [], "6digit": []}

h = Hierarchy(["section", "2digit", "4digit"])
h = Hierarchy(["section", "2digit", "4digit", "6digit"])
parent_code_table = repeated_table_to_parent_id_table(hierarchy, h, fields)
parent_code_table.code = parent_code_table.code.astype(str)
parent_code_table = parent_code_table.merge(names, on=["code", "level"])

# Sort by level order (not necessarily alphabetical)
Expand All @@ -30,25 +34,27 @@
parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h)
parent_id_table["name"] = parent_id_table.name_en

parent_id_table = parent_id_table[["code", "name", "level", "name_en",
"name_es", "name_short_en", "name_short_es", "parent_id"]]
parent_id_table = parent_id_table[
[
"code",
"name",
"level",
"name_en",
"name_es",
"name_short_en",
"name_short_es",
"parent_id",
]
]

# Decide what id each level should start from
# Put ample space between each range of ids
level_starts = {
"section": 0,
"2digit": 100,
"4digit": 650
}
level_starts = {"section": 0, "2digit": 100, "4digit": 650, "6digit": 5000}
parent_id_table = spread_out_entries(parent_id_table, level_starts, h)

# Append services to table
# Spread out services similarly to each set of exports but buffered further
service_starts = {
"section": 10,
"2digit": 400,
"4digit": 4000,
}
service_starts = {"section": 10, "2digit": 400, "4digit": 4000, "6digit": 11000}
services = spread_out_entries(services, service_starts, h)

# Append to main table and sort on combined spread out indices
Expand Down
Loading

0 comments on commit 4abafae

Please sign in to comment.