Skip to content

Commit

Permalink
Add Peru HS classification.
Browse files Browse the repository at this point in the history
  • Loading branch information
makmanalp committed Apr 1, 2016
1 parent 41c1b00 commit 77d146a
Show file tree
Hide file tree
Showing 8 changed files with 4,046 additions and 1 deletion.
3 changes: 3 additions & 0 deletions product/HS/Peru_Datlas/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
This is HS (2000s?) for Peru, with names generously translated by Ryan.

[here](https://docs.google.com/spreadsheets/d/1BV4vOUhGdB1boAux4JkHjoanTezSdvWEkRcanAnwzYw/edit#gid=1029116973)
3 changes: 3 additions & 0 deletions product/HS/Peru_Datlas/Tupfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CLEAN = PYTHONPATH=../../../ python2.7 -B clean.py

: in/* |> $(CLEAN) |> out/products_peru_datlas.csv out/products_peru_datlas.dta
48 changes: 48 additions & 0 deletions product/HS/Peru_Datlas/clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pandas as pd

from classification import (Hierarchy, repeated_table_to_parent_id_table,
parent_code_table_to_parent_id_table,
Classification)

if __name__ == "__main__":
names = pd.read_table("./in/HS_hierarchy_master - Names.tsv",
encoding="utf-8", dtype={"code": str})

hierarchy = pd.read_table("./in/HS_hierarchy_master - Hierarchy.tsv",
encoding="utf-8",
dtype={
"4digit": str,
"2digit": str,
"section": str,
"atlas_section": str,
})
hierarchy.columns = ["4digit_code", "2digit_code", "section_code", "atlas_section"]
hierarchy["name_4digit"] = None
hierarchy["name_2digit"] = None
hierarchy["name_section"] = None


fields = {
"4digit": ["name_4digit"],
"2digit": ["name_2digit"],
"section": ["name_section"]
}

h = Hierarchy(["section", "2digit", "4digit"])
parent_code_table = repeated_table_to_parent_id_table(hierarchy, h, fields)

parent_code_table.code = parent_code_table.code.astype(str)

parent_code_table = parent_code_table.merge(names, on=["code", "level"])

parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h)
parent_id_table.name = parent_id_table.name_en

parent_id_table = parent_id_table[["code", "name", "level", "name_en",
"name_es", "name_short_es",
"name_short_en", "parent_id"]]

c = Classification(parent_id_table, h)

c.to_csv("out/products_peru_datlas.csv")
c.to_stata("out/products_peru_datlas.dta")
Loading

0 comments on commit 77d146a

Please sign in to comment.