Skip to content

Commit

Permalink
Update peru location classification to include XX99 locations and new
Browse files Browse the repository at this point in the history
callao PERU-122
  • Loading branch information
makmanalp committed Mar 28, 2016
1 parent 29233d5 commit 41c1b00
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 6 deletions.
51 changes: 48 additions & 3 deletions location/Peru/datlas/clean.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import pandas as pd

from classification import (Hierarchy, parent_code_table_to_parent_id_table,
Classification)

from classification import (Hierarchy, Classification)

if __name__ == "__main__":

Expand All @@ -19,10 +17,57 @@
# Drop the "district" / level since we don't use it
df = df[df.level != "district"]

def get_id_by_code(df, code):
rows = df[df.code == code].index.tolist()
assert len(rows) == 1
return float(rows[0])

# Create the "Other" provinces that are XX9900 for unknown province data
# points
def create_others(row):
row.parent_id = get_id_by_code(df, row.code)
row.code = row.code[:2] + "9900"
row.level = "msa"
row.name_en = row.name_en + " (unknown province)"
row.name_short_en = row.name_short_en + " (unknown province)"
row.name_es = row.name_es + " (provincia desconocida)"
row.name_short_es = row.name_short_es + " (provincia desconocida)"
row.name = row.name_en
return row

others = df[df.level == "department"].apply(create_others, axis=1)

# Add Callao province that moved from Callo dept that no longer exists
callao = pd.Series({
"code": "159800",
"level": "msa",
"name_es": u"Callao",
"name_short_es": u"Callao",
"name_en": u"Callao",
"name_short_en": u"Callao",
"name": u"Callao",
"parent_id": get_id_by_code(df, "150000") # Callao's parent is Lima
})

df = pd.concat([df,
others,
pd.DataFrame(callao).T,
]).reset_index(drop=True)

df.parent_id = df.parent_id.astype(float)

h = Hierarchy(["country", "department", "msa", "municipality"])
df.level = df.level.astype("category", categories=h, ordered=True)
df.level = df.level.astype(str)

# Drop old Callao department and province
# Do this after reset_index to not mess up the id order
df = df[df.code != "070000"]
df = df[df.code != "070100"]

# Order the columns
df = df[["code","name","level","name_es","name_en","name_short_es","name_short_en","parent_id"]]

c = Classification(df, h)

c.to_csv("out/locations_peru_datlas.csv")
Expand Down
28 changes: 26 additions & 2 deletions location/Peru/datlas/out/locations_peru_datlas.csv
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
4,"040000","Arequipa","department","Arequipa","Arequipa","Arequipa","Arequipa",0.0
5,"050000","Ayacucho","department","Ayacucho","Ayacucho","Ayacucho","Ayacucho",0.0
6,"060000","Cajamarca","department","Cajamarca","Cajamarca","Cajamarca","Cajamarca",0.0
7,"070000","Callao","department","Callao","Callao","Callao","Callao",0.0
8,"080000","Cusco","department","Cusco","Cusco","Cusco","Cusco",0.0
9,"090000","Huancavelica","department","Huancavelica","Huancavelica","Huancavelica","Huancavelica",0.0
10,"100000","Huanuco","department","Huanuco","Huanuco","Huanuco","Huanuco",0.0
Expand Down Expand Up @@ -91,7 +90,6 @@
89,"061100","San Miguel","msa","San Miguel","San Miguel","San Miguel","San Miguel",6.0
90,"061200","San Pablo","msa","San Pablo","San Pablo","San Pablo","San Pablo",6.0
91,"061300","Santa Cruz","msa","Santa Cruz","Santa Cruz","Santa Cruz","Santa Cruz",6.0
92,"070100","Callao","msa","Callao","Callao","Callao","Callao",7.0
93,"080100","Cusco","msa","Cusco","Cusco","Cusco","Cusco",8.0
94,"080200","Acomayo","msa","Acomayo","Acomayo","Acomayo","Acomayo",8.0
95,"080300","Anta","msa","Anta","Anta","Anta","Anta",8.0
Expand Down Expand Up @@ -220,3 +218,29 @@
218,"250200","Atalaya","msa","Atalaya","Atalaya","Atalaya","Atalaya",25.0
219,"250300","Padre Abad","msa","Padre Abad","Padre Abad","Padre Abad","Padre Abad",25.0
220,"250400","Purus","msa","Purus","Purus","Purus","Purus",25.0
221,"019900","Amazonas","msa","Amazonas (provincia desconocida)","Amazonas (unknown province)","Amazonas (provincia desconocida)","Amazonas (unknown province)",1.0
222,"029900","Ancash","msa","Ancash (provincia desconocida)","Ancash (unknown province)","Ancash (provincia desconocida)","Ancash (unknown province)",2.0
223,"039900","Apurimac","msa","Apurimac (provincia desconocida)","Apurimac (unknown province)","Apurimac (provincia desconocida)","Apurimac (unknown province)",3.0
224,"049900","Arequipa","msa","Arequipa (provincia desconocida)","Arequipa (unknown province)","Arequipa (provincia desconocida)","Arequipa (unknown province)",4.0
225,"059900","Ayacucho","msa","Ayacucho (provincia desconocida)","Ayacucho (unknown province)","Ayacucho (provincia desconocida)","Ayacucho (unknown province)",5.0
226,"069900","Cajamarca","msa","Cajamarca (provincia desconocida)","Cajamarca (unknown province)","Cajamarca (provincia desconocida)","Cajamarca (unknown province)",6.0
227,"079900","Callao","msa","Callao (provincia desconocida)","Callao (unknown province)","Callao (provincia desconocida)","Callao (unknown province)",7.0
228,"089900","Cusco","msa","Cusco (provincia desconocida)","Cusco (unknown province)","Cusco (provincia desconocida)","Cusco (unknown province)",8.0
229,"099900","Huancavelica","msa","Huancavelica (provincia desconocida)","Huancavelica (unknown province)","Huancavelica (provincia desconocida)","Huancavelica (unknown province)",9.0
230,"109900","Huanuco","msa","Huanuco (provincia desconocida)","Huanuco (unknown province)","Huanuco (provincia desconocida)","Huanuco (unknown province)",10.0
231,"119900","Ica","msa","Ica (provincia desconocida)","Ica (unknown province)","Ica (provincia desconocida)","Ica (unknown province)",11.0
232,"129900","Junin","msa","Junin (provincia desconocida)","Junin (unknown province)","Junin (provincia desconocida)","Junin (unknown province)",12.0
233,"139900","La Libertad","msa","La Libertad (provincia desconocida)","La Libertad (unknown province)","La Libertad (provincia desconocida)","La Libertad (unknown province)",13.0
234,"149900","Lambayeque","msa","Lambayeque (provincia desconocida)","Lambayeque (unknown province)","Lambayeque (provincia desconocida)","Lambayeque (unknown province)",14.0
235,"159900","Lima","msa","Lima (provincia desconocida)","Lima (unknown province)","Lima (provincia desconocida)","Lima (unknown province)",15.0
236,"169900","Loreto","msa","Loreto (provincia desconocida)","Loreto (unknown province)","Loreto (provincia desconocida)","Loreto (unknown province)",16.0
237,"179900","Madre de Dios","msa","Madre de Dios (provincia desconocida)","Madre de Dios (unknown province)","Madre de Dios (provincia desconocida)","Madre de Dios (unknown province)",17.0
238,"189900","Moquegua","msa","Moquegua (provincia desconocida)","Moquegua (unknown province)","Moquegua (provincia desconocida)","Moquegua (unknown province)",18.0
239,"199900","Pasco","msa","Pasco (provincia desconocida)","Pasco (unknown province)","Pasco (provincia desconocida)","Pasco (unknown province)",19.0
240,"209900","Piura","msa","Piura (provincia desconocida)","Piura (unknown province)","Piura (provincia desconocida)","Piura (unknown province)",20.0
241,"219900","Puno","msa","Puno (provincia desconocida)","Puno (unknown province)","Puno (provincia desconocida)","Puno (unknown province)",21.0
242,"229900","San Martin","msa","San Martin (provincia desconocida)","San Martin (unknown province)","San Martin (provincia desconocida)","San Martin (unknown province)",22.0
243,"239900","Tacna","msa","Tacna (provincia desconocida)","Tacna (unknown province)","Tacna (provincia desconocida)","Tacna (unknown province)",23.0
244,"249900","Tumbes","msa","Tumbes (provincia desconocida)","Tumbes (unknown province)","Tumbes (provincia desconocida)","Tumbes (unknown province)",24.0
245,"259900","Ucayali","msa","Ucayali (provincia desconocida)","Ucayali (unknown province)","Ucayali (provincia desconocida)","Ucayali (unknown province)",25.0
246,"159800","Callao","msa","Callao","Callao","Callao","Callao",15.0
Binary file modified location/Peru/datlas/out/locations_peru_datlas.dta
Binary file not shown.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="linnaeus",
version="v0.0.57",
version="v0.0.58",
author="Mali Akmanalp <Harvard CID>",
description=("Harvard CID's classification tools."),
url="http://github.com/cid-harvard/classifications/",
Expand Down

0 comments on commit 41c1b00

Please sign in to comment.