From 8f35ceba66ee77aa667756a75f0264bd3294ee21 Mon Sep 17 00:00:00 2001 From: Mali Akmanalp Date: Wed, 24 Jun 2015 10:53:19 -0400 Subject: [PATCH] Add population / GDP importing code and endpoint --- colombia/data/models.py | 6 +++++ colombia/data/views.py | 22 ++++++++++++++--- colombia/import.py | 53 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 74 insertions(+), 7 deletions(-) diff --git a/colombia/data/models.py b/colombia/data/models.py index 3a27f70..4a152ba 100644 --- a/colombia/data/models.py +++ b/colombia/data/models.py @@ -49,6 +49,12 @@ class DepartmentYear(BaseModel, IDMixin): eci_rank = db.Column(db.Integer) diversity = db.Column(db.Float) + gdp_nominal = db.Column(db.Integer) + gdp_real = db.Column(db.Integer) + gdp_pc_nominal = db.Column(db.Integer) + gdp_pc_real = db.Column(db.Integer) + + population = db.Column(db.Integer) class ProductYear(BaseModel, IDMixin): diff --git a/colombia/data/views.py b/colombia/data/views.py index 80e1a09..8cefe2f 100644 --- a/colombia/data/views.py +++ b/colombia/data/views.py @@ -1,6 +1,6 @@ from flask import Blueprint, request from .models import (DepartmentProductYear, DepartmentIndustryYear, - ProductYear, Location, IndustryYear) + ProductYear, Location, IndustryYear, DepartmentYear) from ..api_schemas import marshal from .. import api_schemas as schemas @@ -113,8 +113,7 @@ def products_index(product_id=None): @departments_app.route("/departments") -@departments_app.route("/departments/") -def departments_index(department_id=None): +def departments_index(): year = request.args.get("year", None) @@ -132,6 +131,23 @@ def departments_index(department_id=None): raise abort(400, body="Could not find data with the given parameters.") +@departments_app.route("/departments/departmentyear/") +@departments_app.route("/departments/departmentyear/") +def departments_departmentyear(department_id=None): + + q = db.session\ + .query(DepartmentYear.department_id, + DepartmentYear.year, + DepartmentYear.gdp_nominal, + DepartmentYear.gdp_real, + DepartmentYear.gdp_pc_nominal, + DepartmentYear.gdp_pc_real, + DepartmentYear.population)\ + .all() + + return jsonify(data=[x._asdict() for x in q]) + + @industries_app.route("/industries") @industries_app.route("/industries/") def industries_index(product_id=None): diff --git a/colombia/import.py b/colombia/import.py index b8dc34f..4de8615 100644 --- a/colombia/import.py +++ b/colombia/import.py @@ -3,7 +3,6 @@ import pandas as pd import numpy as np -from io import StringIO from atlas_core.helpers.data_import import translate_columns from colombia import models, create_app @@ -11,6 +10,14 @@ from tests import BaseTestCase +def fillin(df, entities): + """STATA style "fillin", make sure all permutations of entities in the + index are in the dataset.""" + df = df.set_index(entities) + return df.reindex( + pd.MultiIndex.from_product(df.index.levels, names=df.index.names)) + + def classification_to_models(classification, model): models = [] for index, row in classification.table.iterrows(): @@ -51,6 +58,11 @@ def inner(line): dy.year = line["year"] dy.eci = line["eci"] # dy.diversity = line["diversity"] + dy.gdp_nominal = line["gdp_nominal"] + dy.gdp_real = line["gdp_real"] + dy.gdp_pc_nominal = line["gdp_pc_nominal"] + dy.gdp_pc_real = line["gdp_pc_real"] + dy.population = line["population"] return dy return inner @@ -281,10 +293,42 @@ def test_process_cpy(self): df.p = df.p.astype(int).astype(str).str.zfill(4) df = translate_columns(df, aduanas_to_atlas) - cy, py, cpy = process_cpy(df, product_map, location_map) - db.session.add_all(cy) - db.session.add_all(py) + cpy = df.apply(make_cpy(location_map, product_map), axis=1) db.session.add_all(cpy) + + py = df.groupby(["product", "year"]).first().reset_index() + py = py.apply(make_py(product_map), axis=1) + db.session.add_all(py) + + # GDP data + gdp_df = pd.read_stata("/Users/makmana/ciddata/metadata/Annual GDP (nominal)/COL_nomrealgdp_dept_annual1990-2012.dta") + gdp_df = gdp_df[["depcode", "year", "depgdpn", "gdpkmultipliedbydeflator"]] + gdp_df.columns = ["department", "year", "gdp_real", "gdp_nominal"] + gdp_df.gdp_real = gdp_df.gdp_real * (10 ** 6) + gdp_df.gdp_nominal = gdp_df.gdp_nominal * (10 ** 6) + gdp_df.department = gdp_df.department.astype(str).str.zfill(2) + + # Pop data + pop_df = pd.read_stata("/Users/makmana/ciddata/metadata/Population/COL_pop_deptmunicip_1985-2012.dta") + pop_df = pop_df[["year", "dp", "popdept"]] + pop_df.columns = ["year", "department", "population"] + pop_df.department = pop_df.department.astype(str).str.zfill(2) + pop_df = pop_df[(2007 <= pop_df.year) & (pop_df.year <= 2013)] + pop_df = pop_df[~pop_df.duplicated()] + + cy = df.groupby(["department", "year"]).first().reset_index() + cy = fillin(cy, ["department", "year"]).reset_index() + cy = cy.merge(gdp_df, + on=["department", "year"], + how="left") + cy = cy.merge(pop_df, + on=["department", "year"], + how="left") + cy["gdp_pc_real"] = cy.gdp_real / cy.population + cy["gdp_pc_nominal"] = cy.gdp_nominal / cy.population + cy = cy.apply(make_cy(location_map), axis=1) + + db.session.add_all(cy) db.session.commit() # Department - industry - year @@ -320,3 +364,4 @@ def inner(line): db.session.commit() +