Skip to content

Commit

Permalink
Merge pull request #14 from cid-harvard/feature-pop-gdp-data
Browse files Browse the repository at this point in the history
Add population / GDP importing code and endpoint
  • Loading branch information
makmanalp committed Jun 25, 2015
2 parents 3d294b7 + 8f35ceb commit 3f582a2
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 7 deletions.
6 changes: 6 additions & 0 deletions colombia/data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ class DepartmentYear(BaseModel, IDMixin):
eci_rank = db.Column(db.Integer)
diversity = db.Column(db.Float)

gdp_nominal = db.Column(db.Integer)
gdp_real = db.Column(db.Integer)
gdp_pc_nominal = db.Column(db.Integer)
gdp_pc_real = db.Column(db.Integer)

population = db.Column(db.Integer)

class ProductYear(BaseModel, IDMixin):

Expand Down
22 changes: 19 additions & 3 deletions colombia/data/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from flask import Blueprint, request
from .models import (DepartmentProductYear, DepartmentIndustryYear,
ProductYear, Location, IndustryYear)
ProductYear, Location, IndustryYear, DepartmentYear)
from ..api_schemas import marshal
from .. import api_schemas as schemas

Expand Down Expand Up @@ -113,8 +113,7 @@ def products_index(product_id=None):


@departments_app.route("/departments")
@departments_app.route("/departments/<int:department_id>")
def departments_index(department_id=None):
def departments_index():

year = request.args.get("year", None)

Expand All @@ -132,6 +131,23 @@ def departments_index(department_id=None):
raise abort(400, body="Could not find data with the given parameters.")


@departments_app.route("/departments/departmentyear/")
@departments_app.route("/departments/departmentyear/<int:department_id>")
def departments_departmentyear(department_id=None):

q = db.session\
.query(DepartmentYear.department_id,
DepartmentYear.year,
DepartmentYear.gdp_nominal,
DepartmentYear.gdp_real,
DepartmentYear.gdp_pc_nominal,
DepartmentYear.gdp_pc_real,
DepartmentYear.population)\
.all()

return jsonify(data=[x._asdict() for x in q])


@industries_app.route("/industries")
@industries_app.route("/industries/<int:industry_id>")
def industries_index(product_id=None):
Expand Down
53 changes: 49 additions & 4 deletions colombia/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,21 @@
import pandas as pd
import numpy as np

from io import StringIO

from atlas_core.helpers.data_import import translate_columns
from colombia import models, create_app
from colombia.core import db
from tests import BaseTestCase


def fillin(df, entities):
"""STATA style "fillin", make sure all permutations of entities in the
index are in the dataset."""
df = df.set_index(entities)
return df.reindex(
pd.MultiIndex.from_product(df.index.levels, names=df.index.names))


def classification_to_models(classification, model):
models = []
for index, row in classification.table.iterrows():
Expand Down Expand Up @@ -51,6 +58,11 @@ def inner(line):
dy.year = line["year"]
dy.eci = line["eci"]
# dy.diversity = line["diversity"]
dy.gdp_nominal = line["gdp_nominal"]
dy.gdp_real = line["gdp_real"]
dy.gdp_pc_nominal = line["gdp_pc_nominal"]
dy.gdp_pc_real = line["gdp_pc_real"]
dy.population = line["population"]
return dy
return inner

Expand Down Expand Up @@ -281,10 +293,42 @@ def test_process_cpy(self):
df.p = df.p.astype(int).astype(str).str.zfill(4)
df = translate_columns(df, aduanas_to_atlas)

cy, py, cpy = process_cpy(df, product_map, location_map)
db.session.add_all(cy)
db.session.add_all(py)
cpy = df.apply(make_cpy(location_map, product_map), axis=1)
db.session.add_all(cpy)

py = df.groupby(["product", "year"]).first().reset_index()
py = py.apply(make_py(product_map), axis=1)
db.session.add_all(py)

# GDP data
gdp_df = pd.read_stata("/Users/makmana/ciddata/metadata/Annual GDP (nominal)/COL_nomrealgdp_dept_annual1990-2012.dta")
gdp_df = gdp_df[["depcode", "year", "depgdpn", "gdpkmultipliedbydeflator"]]
gdp_df.columns = ["department", "year", "gdp_real", "gdp_nominal"]
gdp_df.gdp_real = gdp_df.gdp_real * (10 ** 6)
gdp_df.gdp_nominal = gdp_df.gdp_nominal * (10 ** 6)
gdp_df.department = gdp_df.department.astype(str).str.zfill(2)

# Pop data
pop_df = pd.read_stata("/Users/makmana/ciddata/metadata/Population/COL_pop_deptmunicip_1985-2012.dta")
pop_df = pop_df[["year", "dp", "popdept"]]
pop_df.columns = ["year", "department", "population"]
pop_df.department = pop_df.department.astype(str).str.zfill(2)
pop_df = pop_df[(2007 <= pop_df.year) & (pop_df.year <= 2013)]
pop_df = pop_df[~pop_df.duplicated()]

cy = df.groupby(["department", "year"]).first().reset_index()
cy = fillin(cy, ["department", "year"]).reset_index()
cy = cy.merge(gdp_df,
on=["department", "year"],
how="left")
cy = cy.merge(pop_df,
on=["department", "year"],
how="left")
cy["gdp_pc_real"] = cy.gdp_real / cy.population
cy["gdp_pc_nominal"] = cy.gdp_nominal / cy.population
cy = cy.apply(make_cy(location_map), axis=1)

db.session.add_all(cy)
db.session.commit()

# Department - industry - year
Expand Down Expand Up @@ -320,3 +364,4 @@ def inner(line):


db.session.commit()

0 comments on commit 3f582a2

Please sign in to comment.