diff --git a/colombia/api_schemas.py b/colombia/api_schemas.py index f8c75dd..cf81bf1 100644 --- a/colombia/api_schemas.py +++ b/colombia/api_schemas.py @@ -29,8 +29,8 @@ class Meta: class DepartmentIndustryYearSchema(ma.Schema): class Meta: - fields = ("employment", "wages", "department_id", "industry_id", - "year") + fields = ("employment", "wages", "rca", "distance", "cog", "coi", + "department_id", "industry_id", "year") class DepartmentSchema(ma.Schema): diff --git a/colombia/data/models.py b/colombia/data/models.py index 5e0e81b..3a27f70 100644 --- a/colombia/data/models.py +++ b/colombia/data/models.py @@ -21,6 +21,7 @@ class DepartmentProductYear(BaseModel, IDMixin): import_value = db.Column(db.Integer) export_value = db.Column(db.Integer) + export_rca = db.Column(db.Integer) density = db.Column(db.Float) cog = db.Column(db.Float) @@ -62,6 +63,18 @@ class ProductYear(BaseModel, IDMixin): pci_rank = db.Column(db.Integer) +class IndustryYear(BaseModel, IDMixin): + + __tablename__ = "industry_year" + + industry_id = db.Column(db.Integer, db.ForeignKey(Industry.id)) + year = db.Column(db.Integer) + + industry = db.relationship(Industry) + + complexity = db.Column(db.Float) + + class DepartmentIndustryYear(BaseModel, IDMixin): __tablename__ = "department_industry_year" @@ -75,3 +88,16 @@ class DepartmentIndustryYear(BaseModel, IDMixin): employment = db.Column(db.Integer) wages = db.Column(db.Integer) + + rca = db.Column(db.Integer) + density = db.Column(db.Float) + cog = db.Column(db.Float) + coi = db.Column(db.Float) + + @hybrid_property + def distance(self): + return 1.0 - self.density + + @distance.expression + def distance(cls): + return (1.0 - cls.density).label("distance") diff --git a/colombia/import.py b/colombia/import.py index 13375cc..b8f42ec 100644 --- a/colombia/import.py +++ b/colombia/import.py @@ -66,6 +66,16 @@ def inner(line): return inner +def make_iy(industry_map): + def inner(line): + iy = models.IndustryYear() + iy.industry = industry_map[line["i"]] + iy.year = int(line["year"]) + iy.pci = line["pci"] + return iy + return inner + + def process_cpy(cpy, product_map, department_map): """Take a dataframe and return @@ -82,51 +92,6 @@ def process_cpy(cpy, product_map, department_map): return [cy_out, py_out, cpy_out] -def process_department(dept): - department_data = dept.groupby("department_code")\ - .first().reset_index()\ - [["department_name", "department_code"]] - - def make_department(line): - d = models.Department() - d.aggregation = "department" - d.name = line["department_name"] - d.code = line["department_code"] - return d - - return department_data.apply(make_department, axis=1) - - -def process_product(prod): - d = prod[["code", "name", "name_en", "name_es", "community_id"]] - - four_digit = d[d.code.str.len() == 4] - two_digit = d[d.code.str.len() == 2] - section = d[d.code.str.len() == 3] - - def product_maker(aggregation): - def make_product(line): - d = models.HSProduct() - d.code = line["code"] - d.name = line["name"] - d.en = line["name_en"] - d.es = line["name_es"] - d.aggregation = aggregation - if aggregation == "4digit": - d.section_code = str(line["community_id"]) - community_index = section.code == d.section_code - d.section_name = section[community_index].name_en.values[0] - d.section_name_es = section[community_index].name_es.values[0] - return d - return make_product - - return ( - list(section.apply(product_maker("section"), axis=1)), - list(two_digit.apply(product_maker("2digit"), axis=1)), - list(four_digit.apply(product_maker("4digit"), axis=1)) - ) - - # Taken from ecomplexity_from_cepii_xx_dollar.dta # Sample: [u'department', u'hs4', u'peso', u'dollar', u'__000001', u'M', # u'density', u'eci', u'pci', u'diversity', u'ubiquity', u'coi', u'cog', @@ -277,100 +242,6 @@ def test_process_cpy(self): self.assertEquals(py[2].year, 1998) self.assertEquals(py[2].pci, 1) - def test_process_department(self): - data = """ -department_code department_name municipality_code municipality_name city rural midsize pop_2012 nbi -08 Atlántico 08849 Usiacurí FALSE TRUE FALSE 9238 43.27979231 -11 "Bogotá, D.C." 11001 "Bogotá, D.C." TRUE FALSE FALSE 7571345 9.20300877 -13 Bolívar 13001 Cartagena TRUE FALSE FALSE 967051 26.01059996""" - - data = pd.read_table(StringIO(data), encoding="utf-8", - dtype={"department_code": np.object}) - d = process_department(data) - db.session.add_all(d) - db.session.commit() - - # TODO population, gdp - - self.assertEquals(d[0].aggregation, "department") - self.assertEquals(d[0].name, u"Atlántico") - self.assertEquals(d[0].code, "08") - - self.assertEquals(d[1].aggregation, "department") - self.assertEquals(d[1].name, u"Bogotá, D.C.") - self.assertEquals(d[1].code, "11") - - self.assertEquals(d[2].aggregation, "department") - self.assertEquals(d[2].name, u"Bolívar") - self.assertEquals(d[2].code, "13") - - def test_process_product(self): - - data = """ -code name name_en name_es community_id -0101 Live horses, asses, mules or hinnies Horses Caballos 106 -0102 Live bovine animals Bovines Bovinos 116 -106 Animal & Animal Products Animal & Animal Products NULL 106 -116 Vegetable Products Vegetable Products NULL 116 -04 Dairy, Eggs, Honey, & Ed. Products Dairy, Honey, & Ed. Prod. Productos lácteos, la miel, y Ed. Prod. 106 -06 Live Trees & Other Plants Trees & Plants  Árboles y Plantas 116""" - - data = pd.read_table(StringIO(data), encoding="utf-8", - dtype={"code": np.object}) - section, two_digit, four_digit = process_product(data) - - db.session.add_all(section) - db.session.add_all(two_digit) - db.session.add_all(four_digit) - db.session.commit() - - len(four_digit) == 2 - self.assertEquals(four_digit[0].name, "Live horses, asses, mules or hinnies") - self.assertEquals(four_digit[0].en, "Horses") - self.assertEquals(four_digit[0].es, "Caballos") - self.assertEquals(four_digit[0].code, "0101") - self.assertEquals(four_digit[0].aggregation, "4digit") - self.assertEquals(four_digit[0].section_code, "106") - self.assertEquals(four_digit[0].section_name, "Animal & Animal Products") - self.assertEquals(four_digit[1].name, "Live bovine animals") - self.assertEquals(four_digit[1].en, "Bovines") - self.assertEquals(four_digit[1].es, "Bovinos") - self.assertEquals(four_digit[1].code, "0102") - self.assertEquals(four_digit[1].aggregation, "4digit") - self.assertEquals(four_digit[1].section_code, "116") - self.assertEquals(four_digit[1].section_name, "Vegetable Products") - - len(two_digit) == 2 - self.assertEquals(two_digit[0].name, "Dairy, Eggs, Honey, & Ed. Products") - self.assertEquals(two_digit[0].en, "Dairy, Honey, & Ed. Prod.") - self.assertEquals(two_digit[0].es, "Productos lácteos, la miel, y Ed. Prod.") - self.assertEquals(two_digit[0].code, "04") - self.assertEquals(two_digit[0].aggregation, "2digit") - self.assertEquals(two_digit[0].section_code, None) - self.assertEquals(two_digit[0].section_name, None) - self.assertEquals(two_digit[1].name, "Live Trees & Other Plants") - self.assertEquals(two_digit[1].en, " Trees & Plants") - self.assertEquals(two_digit[1].es, " Árboles y Plantas") - self.assertEquals(two_digit[1].code, "06") - self.assertEquals(two_digit[1].aggregation, "2digit") - self.assertEquals(two_digit[1].section_code, None) - self.assertEquals(two_digit[1].section_name, None) - - len(section) == 2 - self.assertEquals(section[0].name, "Animal & Animal Products") - self.assertEquals(section[0].en, "Animal & Animal Products") - self.assertEquals(section[0].es, None) - self.assertEquals(section[0].code, "106") - self.assertEquals(section[0].aggregation, "section") - self.assertEquals(section[0].section_code, None) - self.assertEquals(section[0].section_name, None) - self.assertEquals(section[1].name, "Vegetable Products") - self.assertEquals(section[1].en, "Vegetable Products") - self.assertEquals(section[1].es, None) - self.assertEquals(section[1].code, "116") - self.assertEquals(section[1].aggregation, "section") - self.assertEquals(section[1].section_code, None) - self.assertEquals(section[1].section_name, None) if __name__ == "__main__": @@ -442,28 +313,35 @@ def parse_dpy(dpy_file, translation_table): # Department - industry - year - df = pd.read_stata("/Users/makmana/ciddata/PILA_andres/COL_PILA_ciy_2008-2012_rev3_stata13.dta") - df = df[["year", "r", "i", "E_yir", "W_yir"]] + df = pd.read_stata("/Users/makmana/ciddata/PILA_andres/COL_PILA_ecomp-E_yir_2008-2012_rev3_dpto.dta") + df = df[["year", "r", "i", "E_yir", "W_yir", "rca", "density", "cog", "coi", "pci"]] df = df[df.i != "."] df = df.merge(industry_classification.table, left_on="i", right_on="code", how="inner") - df["d"] = df.r.str[:2] - df.groupby(["year", "d", "i"]).agg({"E_yir": np.sum, - "W_yir": np.sum}) - def make_diy(): def inner(line): dpy = models.DepartmentIndustryYear() dpy.industry = industry_map[line["i"]] - dpy.department = location_map[line["d"]] + dpy.department = location_map[line["r"]] dpy.year = line["year"] dpy.employment = line["E_yir"] dpy.wages = line["W_yir"] + + dpy.rca = line["rca"] + dpy.density = line["density"] + dpy.cog = line["cog"] + dpy.coi = line["coi"] + return dpy return inner cpy_out = df.apply(make_diy(), axis=1) db.session.add_all(cpy_out) - db.session.commit() + iy = df.groupby(["i", "year"])[["pci"]].first().reset_index() + iy_out = iy.apply(make_iy(industry_map), axis=1) + db.session.add_all(iy_out) + + + db.session.commit() diff --git a/colombia/models.py b/colombia/models.py index a751d2f..a162478 100644 --- a/colombia/models.py +++ b/colombia/models.py @@ -1,2 +1,2 @@ from .metadata.models import Metadata, HSProduct, Location, Industry -from .data.models import DepartmentProductYear, DepartmentYear, ProductYear, DepartmentIndustryYear +from .data.models import DepartmentProductYear, DepartmentYear, ProductYear, DepartmentIndustryYear, IndustryYear