From 4276d7d53a6af0aff9d47c36748bcdb2fed1d956 Mon Sep 17 00:00:00 2001 From: Mali Akmanalp Date: Fri, 26 Jun 2015 20:17:49 -0400 Subject: [PATCH 1/3] Import industry data for munis --- colombia/data/models.py | 28 ++++++++++++++++++++++++++++ colombia/import.py | 26 +++++++++++++++++++++++++- colombia/models.py | 2 +- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/colombia/data/models.py b/colombia/data/models.py index 4a152ba..62600af 100644 --- a/colombia/data/models.py +++ b/colombia/data/models.py @@ -107,3 +107,31 @@ def distance(self): @distance.expression def distance(cls): return (1.0 - cls.density).label("distance") + + +class MunicipalityIndustryYear(BaseModel, IDMixin): + + __tablename__ = "municipality_industry_year" + + municipality_id = db.Column(db.Integer, db.ForeignKey(Location.id)) + industry_id = db.Column(db.Integer, db.ForeignKey(Industry.id)) + year = db.Column(db.Integer) + + municipality = db.relationship(Location) + industry = db.relationship(Industry) + + employment = db.Column(db.Integer) + wages = db.Column(db.Integer) + + rca = db.Column(db.Integer) + density = db.Column(db.Float) + cog = db.Column(db.Float) + coi = db.Column(db.Float) + + @hybrid_property + def distance(self): + return 1.0 - self.density + + @distance.expression + def distance(cls): + return (1.0 - cls.density).label("distance") diff --git a/colombia/import.py b/colombia/import.py index 4de8615..6011572 100644 --- a/colombia/import.py +++ b/colombia/import.py @@ -362,6 +362,30 @@ def inner(line): iy_out = iy.apply(make_iy(industry_map), axis=1) db.session.add_all(iy_out) - db.session.commit() + # Municipality - industry - year + df = pd.read_stata("/Users/makmana/ciddata/PILA_andres/COL_PILA_ecomp-E_yir_2008-2012_rev3_mun.dta") + df = df[["year", "r", "i", "E_yir", "W_yir", "rca", "density", "cog", "coi", "pci"]] + df = df[df.i != "."] + + # Classification.merge_to_table + # Classification.merge_index + + def merge_to_table(classification, classification_name, df, merge_on): + code_to_id = classification.reset_index()[["code", "index"]] + code_to_id.columns = ["code", classification_name] + code_to_id = code_to_id.set_index("code") + return df.merge(code_to_id, left_on=merge_on, + right_index=True, how="left") + + df = merge_to_table(industry_classification.level("class"), + "industry_id", df, "i") + df = merge_to_table(location_classification.level("municipality"), + "municipality_id", df, "r") + + df = df.rename(columns={"E_yir": "employment", "W_yir": "wages"}) + df = df[["municipality_id", "industry_id", "year", "employment", + "wages", "rca", "density", "cog", "coi"]] + df.to_sql("municipality_industry_year", db.engine, index=False, + chunksize=10000, if_exists="append") diff --git a/colombia/models.py b/colombia/models.py index a162478..1f5bff9 100644 --- a/colombia/models.py +++ b/colombia/models.py @@ -1,2 +1,2 @@ from .metadata.models import Metadata, HSProduct, Location, Industry -from .data.models import DepartmentProductYear, DepartmentYear, ProductYear, DepartmentIndustryYear, IndustryYear +from .data.models import DepartmentProductYear, DepartmentYear, ProductYear, DepartmentIndustryYear, IndustryYear, MunicipalityIndustryYear From 993b11d613aaeaf7f58f8d806bed298b2d9ae1f1 Mon Sep 17 00:00:00 2001 From: Mali Akmanalp Date: Mon, 29 Jun 2015 11:26:15 -0400 Subject: [PATCH 2/3] Refactor industry-year and dept-industry-year import --- colombia/import.py | 69 +++++++++++++++++----------------------------- 1 file changed, 25 insertions(+), 44 deletions(-) diff --git a/colombia/import.py b/colombia/import.py index 6011572..949f864 100644 --- a/colombia/import.py +++ b/colombia/import.py @@ -78,16 +78,6 @@ def inner(line): return inner -def make_iy(industry_map): - def inner(line): - iy = models.IndustryYear() - iy.industry = industry_map[line["i"]] - iy.year = int(line["year"]) - iy.complexity = line["pci"] - return iy - return inner - - def process_cpy(cpy, product_map, department_map): """Take a dataframe and return @@ -331,54 +321,45 @@ def test_process_cpy(self): db.session.add_all(cy) db.session.commit() + # Classification.merge_to_table + # Classification.merge_index + + def merge_to_table(classification, classification_name, df, merge_on): + code_to_id = classification.reset_index()[["code", "index"]] + code_to_id.columns = ["code", classification_name] + code_to_id = code_to_id.set_index("code") + return df.merge(code_to_id, left_on=merge_on, + right_index=True, how="left") + # Department - industry - year df = pd.read_stata("/Users/makmana/ciddata/PILA_andres/COL_PILA_ecomp-E_yir_2008-2012_rev3_dpto.dta") df = df[["year", "r", "i", "E_yir", "W_yir", "rca", "density", "cog", "coi", "pci"]] df = df[df.i != "."] - df = df.merge(industry_classification.table, left_on="i", - right_on="code", how="inner") - - def make_diy(): - def inner(line): - dpy = models.DepartmentIndustryYear() - dpy.industry = industry_map[line["i"]] - dpy.department = location_map[line["r"]] - dpy.year = line["year"] - dpy.employment = line["E_yir"] - dpy.wages = line["W_yir"] - - dpy.rca = line["rca"] - dpy.density = line["density"] - dpy.cog = line["cog"] - dpy.coi = line["coi"] + df = merge_to_table(industry_classification.level("class"), + "industry_id", df, "i") + df = merge_to_table(location_classification.level("department"), + "department_id", df, "r") - return dpy - return inner - cpy_out = df.apply(make_diy(), axis=1) - db.session.add_all(cpy_out) + # Industry - Year + iy = df.groupby(["industry_id", "year"])[["pci"]].first().reset_index() + iy = iy.rename(columns={"pci": "complexity"}) + iy.to_sql("industry_year", db.engine, index=False, + chunksize=10000, if_exists="append") - iy = df.groupby(["i", "year"])[["pci"]].first().reset_index() - iy_out = iy.apply(make_iy(industry_map), axis=1) - db.session.add_all(iy_out) + # Department - industry - year + df = df.rename(columns={"E_yir": "employment", "W_yir": "wages"}) + df = df[["department_id", "industry_id", "year", "employment", + "wages", "rca", "density", "cog", "coi"]] + df.to_sql("department_industry_year", db.engine, index=False, + chunksize=10000, if_exists="append") - db.session.commit() # Municipality - industry - year df = pd.read_stata("/Users/makmana/ciddata/PILA_andres/COL_PILA_ecomp-E_yir_2008-2012_rev3_mun.dta") df = df[["year", "r", "i", "E_yir", "W_yir", "rca", "density", "cog", "coi", "pci"]] df = df[df.i != "."] - # Classification.merge_to_table - # Classification.merge_index - - def merge_to_table(classification, classification_name, df, merge_on): - code_to_id = classification.reset_index()[["code", "index"]] - code_to_id.columns = ["code", classification_name] - code_to_id = code_to_id.set_index("code") - return df.merge(code_to_id, left_on=merge_on, - right_index=True, how="left") - df = merge_to_table(industry_classification.level("class"), "industry_id", df, "i") df = merge_to_table(location_classification.level("municipality"), From 39dfb5173e0fbfaada55746efc8c729d4b8a7d90 Mon Sep 17 00:00:00 2001 From: Mali Akmanalp Date: Mon, 29 Jun 2015 12:26:50 -0400 Subject: [PATCH 3/3] Municipality-Industry-Year API --- colombia/api_schemas.py | 8 ++++++++ colombia/data/views.py | 11 ++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/colombia/api_schemas.py b/colombia/api_schemas.py index cf81bf1..0ddb9af 100644 --- a/colombia/api_schemas.py +++ b/colombia/api_schemas.py @@ -33,6 +33,13 @@ class Meta: "department_id", "industry_id", "year") +class MunicipalityIndustryYearSchema(ma.Schema): + + class Meta: + fields = ("employment", "wages", "rca", "distance", "cog", "coi", + "municipality_id", "industry_id", "year") + + class DepartmentSchema(ma.Schema): class Meta: @@ -65,6 +72,7 @@ class ColombiaMetadataSchema(MetadataSchema): department_product_year = DepartmentProductYearSchema(many=True) department_industry_year = DepartmentIndustryYearSchema(many=True) +municipality_industry_year = MunicipalityIndustryYearSchema(many=True) product_year = ProductYearSchema(many=True) department = DepartmentSchema(many=True) metadata = ColombiaMetadataSchema(many=True) diff --git a/colombia/data/views.py b/colombia/data/views.py index 8cefe2f..ec24aae 100644 --- a/colombia/data/views.py +++ b/colombia/data/views.py @@ -1,6 +1,7 @@ from flask import Blueprint, request from .models import (DepartmentProductYear, DepartmentIndustryYear, - ProductYear, Location, IndustryYear, DepartmentYear) + MunicipalityIndustryYear, ProductYear, Location, + IndustryYear, DepartmentYear) from ..api_schemas import marshal from .. import api_schemas as schemas @@ -164,11 +165,19 @@ def industries_index(product_id=None): q = DepartmentIndustryYear.query\ .filter_by(year=year, department_id=location_id) return marshal(schemas.department_industry_year, q) + elif location_type == "municipality": + q = MunicipalityIndustryYear.query\ + .filter_by(year=year, municipality_id=location_id) + return marshal(schemas.municipality_industry_year, q) elif location_id is not None: if location_type == "department": q = DepartmentIndustryYear.query\ .filter_by(department_id=location_id) return marshal(schemas.department_industry_year, q) + elif location_type == "municipality": + q = MunicipalityIndustryYear.query\ + .filter_by(municipality_id=location_id) + return marshal(schemas.municipality_industry_year, q) raise abort(400, body="Could not find data with the given parameters.")