Skip to content

Commit

Permalink
Export URL arg support, new section names, blood 4digit HS name change (
Browse files Browse the repository at this point in the history
#6)

* Add support for downloading sheets using export URL args

* New SITC and HS naming changes (section names, blood)

* Bump version
  • Loading branch information
bleonard33 authored Feb 7, 2019
1 parent d726201 commit a2d8c3c
Show file tree
Hide file tree
Showing 14 changed files with 117 additions and 73 deletions.
11 changes: 11 additions & 0 deletions google_docs_download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import gspread
import requests
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd

Expand All @@ -13,6 +14,16 @@ def bytes_to_df(data, **kwargs):
return pd.read_csv(io, **kwargs)


def download_sheet(key, sheet_id, output_path):
url = (
f"https://docs.google.com/spreadsheets/"
f"d/{key}/export?format=csv&id={key}&gid={sheet_id}"
)

df = bytes_to_df(requests.get(url).content, dtype="str")
df.to_csv(f"{output_path}.tsv", sep="\t", index=False, encoding="utf-8")


def get_classification_from_gdrive(url, credentials_path=None):

if credentials_path is None:
Expand Down
16 changes: 11 additions & 5 deletions product/HS/IntlAtlas/clean.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pandas as pd
import sys

sys.path.append("../../..")
from classification import (
Hierarchy,
repeated_table_to_parent_id_table,
Expand All @@ -9,6 +11,14 @@
Classification,
)


def get_hs_services(file="./in/Services_Hierarchy.csv"):
services = pd.read_csv(file, encoding="utf-8", dtype="str")
# Spread out services similarly to each set of exports but buffered further
service_starts = {"section": 10, "2digit": 400, "4digit": 4000, "6digit": 11000}
return spread_out_entries(services, service_starts, h)


if __name__ == "__main__":
names = pd.read_table(
"./in/HS92_Atlas_Names.tsv", encoding="utf-8", dtype={"code": str}
Expand All @@ -18,8 +28,6 @@
"./in/HS92_Atlas_Hierarchy.tsv", encoding="utf-8", dtype="str"
)

services = pd.read_csv("./in/Services_Hierarchy.csv", encoding="utf-8", dtype="str")

fields = {"section": [], "2digit": [], "4digit": [], "6digit": []}

h = Hierarchy(["section", "2digit", "4digit", "6digit"])
Expand Down Expand Up @@ -51,9 +59,7 @@
parent_id_table = spread_out_entries(parent_id_table, level_starts, h)

# Append services to table
# Spread out services similarly to each set of exports but buffered further
service_starts = {"section": 10, "2digit": 400, "4digit": 4000, "6digit": 11000}
services = spread_out_entries(services, service_starts, h)
services = get_hs_services()

# Append to main table and sort on combined spread out indices
parent_id_table = parent_id_table.append(services).sort_index()
Expand Down
31 changes: 22 additions & 9 deletions product/HS/IntlAtlas/download_sheets.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
from google_docs_download import get_classification_from_gdrive

hierarchy, names = get_classification_from_gdrive(
"https://docs.google.com/spreadsheets/d/1y6UUixlfbW0jLnUtKycHF1ICUaD-kROQIvscwCGFzzE/edit#gid=0"
)
hierarchy.to_csv(
"./in/HS92_Atlas_Hierarchy.tsv", sep="\t", index=False, encoding="utf-8"
)
names.to_csv("./in/HS92_Atlas_Names.tsv", sep="\t", index=False, encoding="utf-8")
import sys

sys.path.append("../../..")
from google_docs_download import download_sheet

if __name__ == "__main__":

sheets = [
{
"key": "1xPMiNhKf8DKMMU3ntUADQsouCWOLM2-2lY_2o58gSIQ",
"sheet_id": "0",
"output_path": "./in/HS92_Atlas_Hierarchy",
},
{
"key": "1xPMiNhKf8DKMMU3ntUADQsouCWOLM2-2lY_2o58gSIQ",
"sheet_id": "1029116973",
"output_path": "./in/HS92_Atlas_Names",
},
]

for sheet in sheets:
download_sheet(**sheet)
18 changes: 9 additions & 9 deletions product/HS/IntlAtlas/in/HS92_Atlas_Names.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ code level name_en name_es name_short_es name_short_en
2941 4digit Antibiotics Antibióticos. Antibióticos Antibiotics
2942 4digit Other organic compounds Los demás compuestos orgánicos. Los demás compuestos orgánicos Other organic compounds
3001 4digit Glands and other organs for organotherapeutic uses, dried, whether or not powdered; extracts of glands or other organs or of their secretions for organotherapeutic uses; heparin and its salts; other human or animal substances prepared for therapeutic or p Glándulas y demás órganos para usos opoterápicos, desecados, incluso pulverizados; extractos de glándulas o de otros órganos o de sus secreciones, para usos opoterápicos; heparina y sus sales; las demás sustancias humanas o animales preparadas para usos terapéuticos o profilácticos, no expresadas ni comprendidas en otra parte. Órganos para usos opoterapicos Heparin for therapeutic use
3002 4digit Human blood; animal blood prepared for therapeutic, prophylactic or diagnostic uses; antisera and other blood fractions and modified immunological products, whether or not obtained by means of biotechnological processes; vaccines, toxins, cultures of micr Sangre humana; sangre animal preparada para usos terapéuticos, profilácticos o de diagnóstico; antisueros (sueros con anticuerpos), demás fracciones de la sangre y productos inmunológicos, incluso modificados u obtenidos por procesos biotecnológicos; vacunas, toxinas, cultivos de microorganismos (excepto las levaduras) y productos similares. Sangre Blood
3002 4digit Human blood; animal blood prepared for therapeutic, prophylactic or diagnostic uses; antisera and other blood fractions and modified immunological products, whether or not obtained by means of biotechnological processes; vaccines, toxins, cultures of micr Sangre humana; sangre animal preparada para usos terapéuticos, profilácticos o de diagnóstico; antisueros (sueros con anticuerpos), demás fracciones de la sangre y productos inmunológicos, incluso modificados u obtenidos por procesos biotecnológicos; vacunas, toxinas, cultivos de microorganismos (excepto las levaduras) y productos similares. Sangre Antisera, vaccines and blood
3003 4digit Medicaments (excluding goods of heading 3002, 3005 or 3006) consisting of two or more constituents which have been mixed together for therapeutic or prophylactic uses, not put up in measured doses or in forms or packings for retail sale Medicamentos (excepto los productos de las partidas 30.02, 30.05 ó 30.06) constituidos por productos mezclados entre sí, preparados para usos terapéuticos o profilácticos, sin dosificar ni acondicionar para la venta al por menor. Medicamentos, no envasados Medicaments, not packaged
3004 4digit Medicaments (excluding goods of heading 3002, 3005 or 3006) consisting of mixed or unmixed products for therapeutic or prophylactic uses, put up in measured doses (including those in the form of transdermal administration systems) or in forms or packings Medicamentos (excepto los productos de las partidas 30.02, 30.05 ó 30.06) constituidos por productos mezclados o sin mezclar, preparados para usos terapéuticos o profilácticos, dosificados (incluidos los administrados por vía transdérmica) o acondicionados para la venta al por menor. Medicamentos, envasados Medicaments, packaged
3005 4digit Wadding, gauze, bandages and similar articles (for example, dressings, adhesive plasters, poultices), impregnated or coated with pharmaceutical substances or put up in forms or packings for retail sale for medical, surgical, dental or veterinary purposes Guatas, gasas, vendas y artículos análogos (por ejemplo: apósitos, esparadrapos, sinapismos), impregnados o recubiertos de sustancias farmacéuticas o acondicionados para la venta al por menor con fines médicos, quirúrgicos, odontológicos o veterinarios. Guatas, gasas, y vendas Wadding, gauze and bandages
Expand Down Expand Up @@ -1341,14 +1341,14 @@ XXXX 4digit Trade data discrepancies Trade data discrepancies Trade data discrep
97 2digit Works of art, collectors' pieces and antiques. Obras de arte, piezas de colección y antigüedades Arte Art
98 2digit (Reserved for special uses by Contracting Parties) (Reservado para usos especiales de las Partes Contratantes) (Reservado para usos especiales de las Partes Contratantes) (Reserved for special uses by Contracting Parties)
99 2digit Other Other Other Other
0 section Textiles and furniture Textiles y muebles Textiles y muebles Textiles and furniture
1 section Vegetables, foodstuffs and wood Vegetales, alimentos y madera Vegetales, alimentos y madera Vegetables, foodstuffs and wood
2 section Stone and glass Piedra y vidrio Piedra y vidrio Stone and glass
3 section Minerals Minerales Minerales Minerals
0 section Textiles, garments, footwear and furniture Textiles y muebles Textiles y muebles Textiles
1 section Vegetables, animals, wood and paper Vegetales, alimentos y madera Vegetales, alimentos y madera Agriculture
2 section Stone, glass and ceramics Piedra y vidrio Piedra y vidrio Stone
3 section Minerals, fuels, ores and salts Minerales Minerales Minerals
4 section Metals Metales Metales Metals
5 section Chemicals and plastics Químicos y plásticos Químicos y plásticos Chemicals and plastics
6 section Transport vehicles Vehículos de transporte Vehículos de transporte Transport vehicles
7 section Machinery Maquinaria Maquinaria Machinery
5 section Chemicals and plastics Químicos y plásticos Químicos y plásticos Chemicals
6 section Transport vehicles Vehículos de transporte Vehículos de transporte Vehicles
7 section Machinery and instruments Maquinaria Maquinaria Machinery
8 section Electronics Electrónicos Electrónicos Electronics
9 section Other Other Other Other
010111 6digit Live horses, pure-bred Horses, pure-bred
Expand Down Expand Up @@ -4837,7 +4837,7 @@ XXXX 4digit Trade data discrepancies Trade data discrepancies Trade data discrep
740110 6digit Copper mattes Copper mattes
740120 6digit Cement copper (precipitated copper) Cement copper
740200 6digit Unrefined copper, copper anodes, electrolytic refinin Unrefined copper
740311 6digit Copper cathodes and sections of cathodes unwrought COpper cathodes
740311 6digit Copper cathodes and sections of cathodes unwrought Copper cathodes
740312 6digit Wire bars, copper, unwrought Wire bars, copper, unwrought
740313 6digit Billets, copper, unwrought Billets, copper, unwrought
740319 6digit Refined copper products, unwrought, nes Refined copper products, unwrought
Expand Down
18 changes: 9 additions & 9 deletions product/HS/IntlAtlas/out/hs92_atlas.csv
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"","code","name","level","name_en","name_es","name_short_en","name_short_es","parent_id"
0,"0","Textiles and furniture","section","Textiles and furniture","Textiles y muebles","Textiles and furniture","Textiles y muebles",""
1,"1","Vegetables, foodstuffs and wood","section","Vegetables, foodstuffs and wood","Vegetales, alimentos y madera","Vegetables, foodstuffs and wood","Vegetales, alimentos y madera",""
2,"2","Stone and glass","section","Stone and glass","Piedra y vidrio","Stone and glass","Piedra y vidrio",""
3,"3","Minerals","section","Minerals","Minerales","Minerals","Minerales",""
0,"0","Textiles, garments, footwear and furniture","section","Textiles, garments, footwear and furniture","Textiles y muebles","Textiles","Textiles y muebles",""
1,"1","Vegetables, animals, wood and paper","section","Vegetables, animals, wood and paper","Vegetales, alimentos y madera","Agriculture","Vegetales, alimentos y madera",""
2,"2","Stone, glass and ceramics","section","Stone, glass and ceramics","Piedra y vidrio","Stone","Piedra y vidrio",""
3,"3","Minerals, fuels, ores and salts","section","Minerals, fuels, ores and salts","Minerales","Minerals","Minerales",""
4,"4","Metals","section","Metals","Metales","Metals","Metales",""
5,"5","Chemicals and plastics","section","Chemicals and plastics","Químicos y plásticos","Chemicals and plastics","Químicos y plásticos",""
6,"6","Transport vehicles","section","Transport vehicles","Vehículos de transporte","Transport vehicles","Vehículos de transporte",""
7,"7","Machinery","section","Machinery","Maquinaria","Machinery","Maquinaria",""
5,"5","Chemicals and plastics","section","Chemicals and plastics","Químicos y plásticos","Chemicals","Químicos y plásticos",""
6,"6","Transport vehicles","section","Transport vehicles","Vehículos de transporte","Vehicles","Vehículos de transporte",""
7,"7","Machinery and instruments","section","Machinery and instruments","Maquinaria","Machinery","Maquinaria",""
8,"8","Electronics","section","Electronics","Electrónicos","Electronics","Electrónicos",""
9,"9","Other","section","Other","Other","Other","Other",""
10,"services","Services","section","Services","Services","Services","Services",""
Expand Down Expand Up @@ -474,7 +474,7 @@
1009,"2941","Antibiotics","4digit","Antibiotics","Antibióticos.","Antibiotics","Antibióticos",128.0
1010,"2942","Other organic compounds","4digit","Other organic compounds","Los demás compuestos orgánicos.","Other organic compounds","Los demás compuestos orgánicos",128.0
1011,"3001","Glands and other organs for organotherapeutic uses, dried, whether or not powdered; extracts of glands or other organs or of their secretions for organotherapeutic uses; heparin and its salts; other human or animal substances prepared for therapeutic or p","4digit","Glands and other organs for organotherapeutic uses, dried, whether or not powdered; extracts of glands or other organs or of their secretions for organotherapeutic uses; heparin and its salts; other human or animal substances prepared for therapeutic or p","Glándulas y demás órganos para usos opoterápicos, desecados, incluso pulverizados; extractos de glándulas o de otros órganos o de sus secreciones, para usos opoterápicos; heparina y sus sales; las demás sustancias humanas o animales preparadas para usos terapéuticos o profilácticos, no expresadas ni comprendidas en otra parte.","Heparin for therapeutic use","Órganos para usos opoterapicos",129.0
1012,"3002","Human blood; animal blood prepared for therapeutic, prophylactic or diagnostic uses; antisera and other blood fractions and modified immunological products, whether or not obtained by means of biotechnological processes; vaccines, toxins, cultures of micr","4digit","Human blood; animal blood prepared for therapeutic, prophylactic or diagnostic uses; antisera and other blood fractions and modified immunological products, whether or not obtained by means of biotechnological processes; vaccines, toxins, cultures of micr","Sangre humana; sangre animal preparada para usos terapéuticos, profilácticos o de diagnóstico; antisueros (sueros con anticuerpos), demás fracciones de la sangre y productos inmunológicos, incluso modificados u obtenidos por procesos biotecnológicos; vacunas, toxinas, cultivos de microorganismos (excepto las levaduras) y productos similares.","Blood","Sangre",129.0
1012,"3002","Human blood; animal blood prepared for therapeutic, prophylactic or diagnostic uses; antisera and other blood fractions and modified immunological products, whether or not obtained by means of biotechnological processes; vaccines, toxins, cultures of micr","4digit","Human blood; animal blood prepared for therapeutic, prophylactic or diagnostic uses; antisera and other blood fractions and modified immunological products, whether or not obtained by means of biotechnological processes; vaccines, toxins, cultures of micr","Sangre humana; sangre animal preparada para usos terapéuticos, profilácticos o de diagnóstico; antisueros (sueros con anticuerpos), demás fracciones de la sangre y productos inmunológicos, incluso modificados u obtenidos por procesos biotecnológicos; vacunas, toxinas, cultivos de microorganismos (excepto las levaduras) y productos similares.","Antisera, vaccines and blood","Sangre",129.0
1013,"3003","Medicaments (excluding goods of heading 3002, 3005 or 3006) consisting of two or more constituents which have been mixed together for therapeutic or prophylactic uses, not put up in measured doses or in forms or packings for retail sale","4digit","Medicaments (excluding goods of heading 3002, 3005 or 3006) consisting of two or more constituents which have been mixed together for therapeutic or prophylactic uses, not put up in measured doses or in forms or packings for retail sale","Medicamentos (excepto los productos de las partidas 30.02, 30.05 ó 30.06) constituidos por productos mezclados entre sí, preparados para usos terapéuticos o profilácticos, sin dosificar ni acondicionar para la venta al por menor.","Medicaments, not packaged","Medicamentos, no envasados",129.0
1014,"3004","Medicaments (excluding goods of heading 3002, 3005 or 3006) consisting of mixed or unmixed products for therapeutic or prophylactic uses, put up in measured doses (including those in the form of transdermal administration systems) or in forms or packings","4digit","Medicaments (excluding goods of heading 3002, 3005 or 3006) consisting of mixed or unmixed products for therapeutic or prophylactic uses, put up in measured doses (including those in the form of transdermal administration systems) or in forms or packings","Medicamentos (excepto los productos de las partidas 30.02, 30.05 ó 30.06) constituidos por productos mezclados o sin mezclar, preparados para usos terapéuticos o profilácticos, dosificados (incluidos los administrados por vía transdérmica) o acondicionados para la venta al por menor.","Medicaments, packaged","Medicamentos, envasados",129.0
1015,"3005","Wadding, gauze, bandages and similar articles (for example, dressings, adhesive plasters, poultices), impregnated or coated with pharmaceutical substances or put up in forms or packings for retail sale for medical, surgical, dental or veterinary purposes","4digit","Wadding, gauze, bandages and similar articles (for example, dressings, adhesive plasters, poultices), impregnated or coated with pharmaceutical substances or put up in forms or packings for retail sale for medical, surgical, dental or veterinary purposes","Guatas, gasas, vendas y artículos análogos (por ejemplo: apósitos, esparadrapos, sinapismos), impregnados o recubiertos de sustancias farmacéuticas o acondicionados para la venta al por menor con fines médicos, quirúrgicos, odontológicos o veterinarios.","Wadding, gauze and bandages","Guatas, gasas, y vendas",129.0
Expand Down Expand Up @@ -4846,7 +4846,7 @@
8483,"740110","Copper mattes","6digit","Copper mattes","","Copper mattes","",1517.0
8484,"740120","Cement copper (precipitated copper)","6digit","Cement copper (precipitated copper)","","Cement copper","",1517.0
8485,"740200","Unrefined copper, copper anodes, electrolytic refinin","6digit","Unrefined copper, copper anodes, electrolytic refinin","","Unrefined copper","",1518.0
8486,"740311","Copper cathodes and sections of cathodes unwrought","6digit","Copper cathodes and sections of cathodes unwrought","","COpper cathodes","",1519.0
8486,"740311","Copper cathodes and sections of cathodes unwrought","6digit","Copper cathodes and sections of cathodes unwrought","","Copper cathodes","",1519.0
8487,"740312","Wire bars, copper, unwrought","6digit","Wire bars, copper, unwrought","","Wire bars, copper, unwrought","",1519.0
8488,"740313","Billets, copper, unwrought","6digit","Billets, copper, unwrought","","Billets, copper, unwrought","",1519.0
8489,"740319","Refined copper products, unwrought, nes","6digit","Refined copper products, unwrought, nes","","Refined copper products, unwrought","",1519.0
Expand Down
Binary file modified product/HS/IntlAtlas/out/hs92_atlas.dta
Binary file not shown.
2 changes: 2 additions & 0 deletions product/SITC/IntlAtlas/clean_sitc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pandas as pd
import sys

sys.path.append("../../..")
from classification import (
Hierarchy,
repeated_table_to_parent_id_table,
Expand Down
32 changes: 22 additions & 10 deletions product/SITC/IntlAtlas/download_sheets.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
from google_docs_download import get_classification_from_gdrive

# SITC Rev2
hierarchy, names = get_classification_from_gdrive(
"https://docs.google.com/spreadsheets/d/1pgVamRALdpc7IlmzkvJrLk4Hkm4H4wvNyoIycQ6wr_E/edit#gid=1207195644"
)
hierarchy.to_csv(
"./in/SITC_Rev2_Hierarchy.tsv", sep="\t", index=False, encoding="utf-8"
)
names.to_csv("./in/SITC_Rev2_Names.tsv", sep="\t", index=False, encoding="utf-8")
import sys

sys.path.append("../../..")
from google_docs_download import download_sheet

if __name__ == "__main__":

sheets = [
{
"key": "16Wpxs5SL4s1YUus29lqCkbgC2acXQ19iXZwWEPvvcpM",
"sheet_id": "0",
"output_path": "./in/SITC_Rev2_Hierarchy",
},
{
"key": "16Wpxs5SL4s1YUus29lqCkbgC2acXQ19iXZwWEPvvcpM",
"sheet_id": "1207195644",
"output_path": "./in/SITC_Rev2_Names",
},
]

for sheet in sheets:
download_sheet(**sheet)
Loading

0 comments on commit a2d8c3c

Please sign in to comment.