From f1adfc4917595e28f4aded162411d8566ff288c6 Mon Sep 17 00:00:00 2001 From: Mali Akmanalp Date: Tue, 16 Jun 2015 11:06:56 -0400 Subject: [PATCH] Add Industry-Year variables --- colombia/data/models.py | 12 ++++++++++++ colombia/import.py | 19 +++++++++++++++++-- colombia/models.py | 2 +- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/colombia/data/models.py b/colombia/data/models.py index 79b5497..3a27f70 100644 --- a/colombia/data/models.py +++ b/colombia/data/models.py @@ -63,6 +63,18 @@ class ProductYear(BaseModel, IDMixin): pci_rank = db.Column(db.Integer) +class IndustryYear(BaseModel, IDMixin): + + __tablename__ = "industry_year" + + industry_id = db.Column(db.Integer, db.ForeignKey(Industry.id)) + year = db.Column(db.Integer) + + industry = db.relationship(Industry) + + complexity = db.Column(db.Float) + + class DepartmentIndustryYear(BaseModel, IDMixin): __tablename__ = "department_industry_year" diff --git a/colombia/import.py b/colombia/import.py index 04c3edc..b8f42ec 100644 --- a/colombia/import.py +++ b/colombia/import.py @@ -66,6 +66,16 @@ def inner(line): return inner +def make_iy(industry_map): + def inner(line): + iy = models.IndustryYear() + iy.industry = industry_map[line["i"]] + iy.year = int(line["year"]) + iy.pci = line["pci"] + return iy + return inner + + def process_cpy(cpy, product_map, department_map): """Take a dataframe and return @@ -304,7 +314,7 @@ def parse_dpy(dpy_file, translation_table): # Department - industry - year df = pd.read_stata("/Users/makmana/ciddata/PILA_andres/COL_PILA_ecomp-E_yir_2008-2012_rev3_dpto.dta") - df = df[["year", "r", "i", "E_yir", "W_yir", "rca", "density", "cog", "coi"]] + df = df[["year", "r", "i", "E_yir", "W_yir", "rca", "density", "cog", "coi", "pci"]] df = df[df.i != "."] df = df.merge(industry_classification.table, left_on="i", @@ -328,5 +338,10 @@ def inner(line): return inner cpy_out = df.apply(make_diy(), axis=1) db.session.add_all(cpy_out) - db.session.commit() + iy = df.groupby(["i", "year"])[["pci"]].first().reset_index() + iy_out = iy.apply(make_iy(industry_map), axis=1) + db.session.add_all(iy_out) + + + db.session.commit() diff --git a/colombia/models.py b/colombia/models.py index a751d2f..a162478 100644 --- a/colombia/models.py +++ b/colombia/models.py @@ -1,2 +1,2 @@ from .metadata.models import Metadata, HSProduct, Location, Industry -from .data.models import DepartmentProductYear, DepartmentYear, ProductYear, DepartmentIndustryYear +from .data.models import DepartmentProductYear, DepartmentYear, ProductYear, DepartmentIndustryYear, IndustryYear