Skip to content

Commit

Permalink
Update SITC rev 2 with new ids that have spacing between them in case it
Browse files Browse the repository at this point in the history
becomes needed in the future.
  • Loading branch information
makmanalp committed Oct 25, 2017
1 parent 54539da commit 432c40b
Show file tree
Hide file tree
Showing 4 changed files with 1,121 additions and 1,108 deletions.
15 changes: 14 additions & 1 deletion product/SITC/IntlAtlas/clean_sitc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from classification import (Hierarchy, repeated_table_to_parent_id_table,
parent_code_table_to_parent_id_table,
spread_out_entries, sort_by_code_and_level,
Classification)

if __name__ == "__main__":
Expand All @@ -25,15 +26,27 @@
h = Hierarchy(["section", "2digit", "3digit", "4digit"])
parent_code_table = repeated_table_to_parent_id_table(hierarchy, h, fields)
parent_code_table.code = parent_code_table.code.astype(str)

parent_code_table = parent_code_table.merge(names, on=["code", "level"])

# Sort by level order (not necessarily alphabetical)
parent_code_table = sort_by_code_and_level(parent_code_table, h)

parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h)
parent_id_table["name"] = parent_id_table.name_en

parent_id_table = parent_id_table[["code", "name", "level", "name_en",
"name_es", "name_short_en", "name_short_es", "parent_id"]]

# Decide what id each level should start from
# Put ample space between each range of ids
level_starts = {
"section": 0,
"2digit": 100,
"3digit": 250,
"4digit": 650
}
parent_id_table = spread_out_entries(parent_id_table, level_starts, h)

c = Classification(parent_id_table, h)

c.to_csv("out/sitc_rev2.csv")
Expand Down
Loading

0 comments on commit 432c40b

Please sign in to comment.