Skip to content

Commit

Permalink
added Total_type World total trade special case management related to #1
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul Girard committed Feb 19, 2015
1 parent 07a9dc7 commit 39ab51b
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 16 deletions.
50 changes: 34 additions & 16 deletions database_creation/mdb_to_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json



try :
conf=json.load(open("config.json","r"))
except :
Expand Down Expand Up @@ -34,10 +35,14 @@
# print new_table_name
#c.executescript()
sql=subprocess.check_output("mdb-export -I sqlite %s '%s'"%(mdb_filename,table),shell=True)
print "%s: got %s lines of sql"%(table,len(sql.split("\n")))
print "%s: got %s lines of sql"%(table,len(sql.split(";\n")))
#c.execute("BEGIN TRANSACTION")
for insert in sql.split("\n"):
c.execute(insert)
for insert in sql.split(";\n"):
try :
c.execute(insert)
except Exception as e:
print "'%s'"%insert
raise e
#c.execute("END")

print "inserts done"
Expand Down Expand Up @@ -67,6 +72,12 @@

# clean Land/Sea
c.execute("UPDATE `flow` SET `Land/Sea` = null WHERE `Land/Sea` = ' '")
#clean total type
c.execute("UPDATE `flow` SET `Total_type` = lower(`Total_type`) WHERE `Total_type` is not null")





# RICENTITIES
# add a slug as RICentities id
Expand All @@ -76,9 +87,7 @@
c.execute("""UPDATE RICentities SET id=REPLACE(id,"(","") WHERE 1""")
c.execute("""UPDATE RICentities SET id=REPLACE(id,")","") WHERE 1""")
c.execute("""UPDATE RICentities SET id=REPLACE(id,"***","") WHERE 1""")
#create indeces
c.execute("""CREATE UNIQUE INDEX i_re_id ON RICentities (id)""")
c.execute("""CREATE INDEX i_re_rn ON RICentities (RICname)""")



# remove 770 'Pas de données' : a priori on tente de les garder
Expand Down Expand Up @@ -118,18 +127,21 @@
LEFT OUTER JOIN entity_names_cleaning as p ON trim(`Partner Entity_Original Name`)=p.original_name COLLATE NOCASE
LEFT OUTER JOIN RICentities p2 ON p2.RICname=p.RICname
WHERE
`Partner Entity_Sum` is null
and ((`Total Trade Estimation` is null and partner != "World" )or(`Total Trade Estimation`=1 and partner = "World"))
`Partner Entity_Sum` is null
and partner is not null
and expimp != "Re-exp"
""")

# INDEX
c.execute("""CREATE INDEX i_rid ON flow_joined (reporting_id)""")
c.execute("""CREATE INDEX i_pid ON flow_joined (partner_id)""")
c.execute("""CREATE INDEX i_yr ON flow_joined (Yr)""")
c.execute("""CREATE INDEX i_r ON flow_joined (reporting)""")
c.execute("""CREATE INDEX i_p ON flow_joined (partner)""")
# taking care of Total_type flag to define the world partner
# and ((`Total Trade Estimation` is null and partner != "World" )or(`Total Trade Estimation`=1 and partner = "World"))
c.execute("""INSERT INTO RICentities (`id`,`RICname`,`type`,`continent`) VALUES ("Worldestimated","World_estimated","geographical_area","World")""")
c.execute("""UPDATE flow_joined SET partner="World_estimated", partner_id="Worldestimated" WHERE partner="World" and Total_type="total_estimated" """)
c.execute("""INSERT INTO RICentities (`id`,`RICname`,`type`,`continent`) VALUES ("Worldasreported","World_as_reported","geographical_area","World")""")
c.execute("""UPDATE flow_joined SET partner="World_as_reported", partner_id="Worldasreported" WHERE partner="World" and Total_type="total_reporting1" """)
c.execute("""INSERT INTO RICentities (`id`,`RICname`,`type`,`continent`) VALUES ("Worldasreported2","World_as_reported2","geographical_area","World")""")
c.execute("""UPDATE flow_joined SET partner="World_as_reported2", partner_id="Worldasreported2" WHERE partner="World" and Total_type="total_reporting2" """)
c.execute("""INSERT INTO RICentities (`id`,`RICname`,`type`,`continent`) VALUES ("Worldundefined","World_undefined","geographical_area","World")""")
c.execute("""UPDATE flow_joined SET partner="World_undefined", partner_id="Worldundefined" WHERE partner="World" and Total_type is null """)



Expand Down Expand Up @@ -270,8 +282,14 @@
print ("removing %s Gen duplicates for %s"%(r,len(ids))).encode("utf8")
c.execute("DELETE FROM flow_joined WHERE id IN (%s)"%",".join(ids))



# INDEX
c.execute("""CREATE INDEX i_rid ON flow_joined (reporting_id)""")
c.execute("""CREATE INDEX i_pid ON flow_joined (partner_id)""")
c.execute("""CREATE INDEX i_yr ON flow_joined (Yr)""")
c.execute("""CREATE INDEX i_r ON flow_joined (reporting)""")
c.execute("""CREATE INDEX i_p ON flow_joined (partner)""")
c.execute("""CREATE UNIQUE INDEX i_re_id ON RICentities (id)""")
c.execute("""CREATE INDEX i_re_rn ON RICentities (RICname)""")

print "cleaning done"
conn.commit()
Expand Down
4 changes: 4 additions & 0 deletions database_creation/test/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
flow_status="OK" if flow.test(cursor) else "FAILED"
print "FLOW TEST : %s"%flow_status

import total_type
total_type_status="OK" if total_type.test(cursor) else "FAILED"
print "TOTAL_TYPE TEST : %s"%total_type_status


# c.execute("""SELECT `Reporting Entity_Original Name`
# FROM flow
Expand Down
58 changes: 58 additions & 0 deletions database_creation/test/total_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import codecs
import os

import json
import sqlite3


def test(cursor):
cursor.execute("""SELECT count(*)
FROM flow_joined
WHERE partner="World_undefined"
""")
print "%s undefined type of Total Trade to World"%cursor.fetchone()

cursor.execute("""SELECT count(*),sum(nb) FROM (SELECT count(*) as nb
FROM flow_joined
group by reporting,Yr,expimp)
""")
nb_reporting_annual_flows, total_flows=cursor.fetchone()
print "%s number of reporting exp or imp total annual flows on %s total"%(nb_reporting_annual_flows,total_flows)

cursor.execute("""SELECT world_type_group, sum(nb) as sum FROM (SELECT count(*) as nb,group_concat(partner,"|") as world_type_group
FROM flow_joined
WHERE partner IN ("World_undefined","World_estimated","World_as_reported","World_as_reported2")
group by reporting,Yr,expimp )
group by world_type_group
ORDER BY sum DESC
""")
print "\nrepartition of type of Total Trade to World as duplicates:"
missing_world_flows_worldview=0
missing_world_flows_countryview=0
undefined=0

for world_type_group,nb in cursor:
print world_type_group,nb
if "World_estimated" in world_type_group or "World_as_reported" in world_type_group:
missing_world_flows_worldview+=nb
if "World_as_reported2" in world_type_group or "World_as_reported" in world_type_group:
missing_world_flows_countryview+=nb
if world_type_group == "World_undefined":
undefined+=nb
print "\n%s %.1f%% flows compatible with world view"%(missing_world_flows_worldview,100*float(missing_world_flows_worldview)/nb_reporting_annual_flows)
print "%s %.1f%% flows compatible with country view"%(missing_world_flows_countryview,100*float(missing_world_flows_countryview)/nb_reporting_annual_flows)
print "%s Total trade to World flows with no type"%undefined

print "\nisolating World_undefined-only flows :"

cursor.execute("""SELECT reporting,count(*),group_concat(Yr,"|")
FROM (SELECT reporting,Yr,group_concat(partner) as partners_group
FROM flow_joined
WHERE partner IN ("World_undefined","World_estimated","World_as_reported","World_as_reported2")
group by reporting,Yr,expimp )
WHERE partners_group="World_undefined"
group by reporting
""")
for f in cursor:
print "Reporting: %s, %s cases, years: %s"%f
return True

0 comments on commit 39ab51b

Please sign in to comment.