Skip to content

Commit

Permalink
Remove World as a country in Atlas classification
Browse files Browse the repository at this point in the history
  • Loading branch information
bleonard33 committed Sep 7, 2018
1 parent 396add3 commit aed2d61
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 12 deletions.
37 changes: 26 additions & 11 deletions location/International/Atlas/clean.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
import pandas as pd

from classification import (Hierarchy, parent_code_table_to_parent_id_table,
Classification)
from classification import (
Hierarchy,
parent_code_table_to_parent_id_table,
Classification,
)

if __name__ == "__main__":

df = pd.read_csv("./in/International Atlas Location Codes - Clean Countries.csv",
encoding="utf-8",
dtype={"parent_code": str}
)\
.drop("comtrade_name", axis=1)
df = pd.read_csv(
"./in/International Atlas Location Codes - Clean Countries.csv",
encoding="utf-8",
dtype={"parent_code": str},
).drop("comtrade_name", axis=1)
df["level"] = "country"

regions = pd.read_csv("./in/International Atlas Location Codes - Continents and Regions.csv", encoding="utf-8")
regions = pd.read_csv(
"./in/International Atlas Location Codes - Continents and Regions.csv",
encoding="utf-8",
)
regions["name_short_en"] = regions["name_en"]
regions["name_short_es"] = regions["name_es"]
regions["level"] = "region"
Expand All @@ -34,17 +40,26 @@

# Add in other properties, like in_rankings and trusted
trusted = pd.read_csv("./in/trusted_countries.csv", index_col="iso")
parent_id_table = parent_id_table.merge(trusted, left_on="code", right_index=True, how="left")
parent_id_table = parent_id_table.merge(
trusted, left_on="code", right_index=True, how="left"
)
in_rankings = pd.read_table("./in/in_rankings.csv", index_col="iso", sep=";")
parent_id_table = parent_id_table.merge(in_rankings, left_on="code", right_index=True, how="left")
parent_id_table = parent_id_table.merge(
in_rankings, left_on="code", right_index=True, how="left"
)
services = pd.read_csv("./in/services_flags.csv", index_col="code")
services = services.astype(float)
parent_id_table = parent_id_table.merge(services, left_on="code", right_index=True, how="left")
parent_id_table = parent_id_table.merge(
services, left_on="code", right_index=True, how="left"
)

# Services flags should be False in case of regions, per Huy's request
parent_id_table.loc[parent_id_table.level == "region", "reported_serv"] = 0
parent_id_table.loc[parent_id_table.level == "region", "reported_serv_recent"] = 0

# Remove World as a country since it doesn't have data
parent_id_table = parent_id_table[parent_id_table.code != "WLD"]

c = Classification(parent_id_table, h)
c.to_csv("out/locations_international_atlas.csv")
c.to_stata("out/locations_international_atlas.dta")
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@
248,"ZWE","country","Zimbabwe","Zimbabwe (Republica De)","Zimbabwe","Zimbabwe",352.0,"Zimbabwe",0.0,1.0,1.0,1.0
249,"TWN","country","Taiwan","Taiwan","Taiwan","Taiwan",353.0,"Taiwan",1.0,0.0,0.0,0.0
250,"ANS","country","Undeclared Countries","Paises No Declarados","Undeclared Countries","Paises No Declarados",358.0,"Undeclared Countries",0.0,0.0,0.0,0.0
251,"WLD","country","World","","World","",358.0,"World",0.0,0.0,0.0,0.0
352,"1","region","Africa","África","Africa","África","","Africa","","",0.0,0.0
353,"2","region","Asia","Asia","Asia","Asia","","Asia","","",0.0,0.0
354,"3","region","Oceania","Oceanía","Oceania","Oceanía","","Oceania","","",0.0,0.0
Expand Down
Binary file modified location/International/Atlas/out/locations_international_atlas.dta
Binary file not shown.

0 comments on commit aed2d61

Please sign in to comment.