-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of github.com:datakind/humanitarian_ai_assistant
- Loading branch information
Showing
11 changed files
with
238 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6 changes: 1 addition & 5 deletions
6
...rkflows/main_ai-assistants-prototypes.yml → .github/workflows/test_deploy.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import sys | ||
|
||
import pandas as pd | ||
|
||
|
||
def filter_hapi_df(df, admin0_code_field): | ||
""" | ||
Filter a pandas DataFrame by removing columns where all values are null and removing rows where any value is null. | ||
Hack to get around the fact HDX mixes total values in with disaggregated values in the API | ||
Args: | ||
df (pandas.DataFrame): The DataFrame to be filtered. | ||
admin0_code_field (str): The name of the column containing the admin0 code. | ||
Returns: | ||
pandas.DataFrame: The filtered DataFrame. | ||
""" | ||
df_orig = df.copy() | ||
|
||
if df.shape[0] == 0: | ||
return df_orig | ||
|
||
dfs = [] | ||
if admin0_code_field in df.columns: | ||
for country in df[admin0_code_field].unique(): | ||
df2 = df.copy() | ||
df2 = df2[df2[admin0_code_field] == country] | ||
|
||
# Remove any columns where all null | ||
df2 = df2.dropna(axis=1, how="all") | ||
|
||
# Remove any rows where one of the values is null | ||
df2 = df2.dropna(axis=0, how="any") | ||
|
||
dfs.append(df.iloc[df2.index]) | ||
|
||
df = pd.concat(dfs) | ||
|
||
return df | ||
|
||
|
||
def post_process_data(df, standard_names): | ||
""" | ||
Post-processes the data by filtering and renaming columns. | ||
Args: | ||
df (pandas.DataFrame): The DataFrame to be post-processed. | ||
Returns: | ||
pandas.DataFrame: The post-processed DataFrame. | ||
""" | ||
# aggregate and disaggregated data in the same tables, where the hierarchy differs by country | ||
df = filter_hapi_df(df, standard_names["admin0_code_field"]) | ||
|
||
# Add a flag to indicate latest dataset by HDX ID, useful for LLM queries | ||
if "resource_hdx_id" in df.columns: | ||
df["latest"] = 0 | ||
df["reference_period_start"] = pd.to_datetime(df["reference_period_start"]) | ||
df["latest"] = df.groupby("dataset_hdx_stub")[ | ||
"reference_period_start" | ||
].transform(lambda x: x == x.max()) | ||
|
||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
Using the database table list below, and the columns provided in each table, generate | ||
Python that summarizes the following: | ||
|
||
"Count of Organizations which are active on the ground in Mali, by sector" | ||
|
||
Coding tips ... | ||
|
||
The shapefile in the database will need to be converted to a geoseries for plotting, here is an example: | ||
|
||
` `` | ||
# Convert the data into a DataFrame | ||
df = pd.DataFrame(rows, columns=["adm1_code", "population", "geometry"]) | ||
|
||
# Convert the 'geometry' column into a GeoSeries | ||
df['geometry'] = df['geometry'].apply(lambda x: wkb.loads(x, hex=True)) | ||
|
||
# Convert the DataFrame into a GeoDataFrame | ||
gdf = gpd.GeoDataFrame(df, geometry='geometry') | ||
``` | ||
Also, please always save any images to a .png file. | ||
|
||
Always specify a clear title on any graphs or maps. | ||
Always add annotations, labels and units on any graphs/maps | ||
You can use any kind of visualization | ||
|
||
IMPORTANT: Generate reusable code, by putting it in a function with arguments, and provide an example of how to call it. | ||
|
||
Always print any SQL statements and the size of the results returned | ||
|
||
Database connection details are in the following environment variables (saved in the .env file) ... | ||
|
||
POSTGRES_DATA_HOST | ||
POSTGRES_DATA_PORT | ||
POSTGRES_DATA_DB | ||
POSTGRES_DATA_USER | ||
POSTGRES_DATA_PASSWORD | ||
|
||
Using python .env module to load these environment varaibles. | ||
|
||
In SQL queries with more than one table, always use table aliases to avoid ambiguous columns | ||
|
||
Make note of column types, if you are asked to plot the count of something, SUM will not work | ||
|
||
Always use country codes instead of names where possible | ||
|
||
Tables and their columns ... | ||
|
||
{ | ||
"select table_name, summary, columns from table_metadata\n": [ | ||
{ | ||
"table_name" : "hapi_admin1", | ||
"summary" : "['Locations and Administrative Divisions']", | ||
"columns" : "code (text); name (text); adm0_code (text); location_name (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_admin2", | ||
"summary" : "['Locations and Administrative Divisions']", | ||
"columns" : "code (text); name (text); adm1_code (text); adm1_name (text); adm0_code (text); location_name (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_age_range", | ||
"summary" : "['Age and Gender Disaggregations']", | ||
"columns" : "age_min (bigint); age_max (double precision); code (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_dataset", | ||
"summary" : "['HDX Metadata']", | ||
"columns" : "hdx_id (text); hdx_stub (text); title (text); hdx_provider_stub (text); hdx_provider_name (text); hdx_link (text); hdx_api_link (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_3w", | ||
"summary" : "['3W Operational Presence']", | ||
"columns" : "reference_period_end (double precision); dataset_hdx_stub (text); resource_hdx_id (text); org_acronym (text); org_name (text); sector_name (text); adm0_code (text); location_name (text); reference_period_start (text); adm1_code (text); adm1_name (text); adm2_code (text); sector_code (text); adm2_name (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_gender", | ||
"summary" : "['Age and Gender Disaggregations']", | ||
"columns" : "code (text); description (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_location", | ||
"summary" : "['Locations and Administrative Divisions']", | ||
"columns" : "code (text); name (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_org", | ||
"summary" : "['Humanitarian Organizations and Sectors']", | ||
"columns" : "org_type_code (double precision); acronym (text); name (text); org_type_description (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_org_type", | ||
"summary" : "['Humanitarian Organizations and Sectors']", | ||
"columns" : "code (bigint); description (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_population_group", | ||
"summary" : "['Population Groups and Statuses']", | ||
"columns" : "code (text); description (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_population_status", | ||
"summary" : "['Population Groups and Statuses']", | ||
"columns" : "code (text); description (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_resource", | ||
"summary" : "['HDX Metadata']", | ||
"columns" : "is_hxl (boolean); name (text); format (text); update_date (text); download_url (text); dataset_hdx_id (text); dataset_hdx_stub (text); dataset_title (text); dataset_hdx_provider_stub (text); dataset_hdx_provider_name (text); hdx_link (text); hdx_api_link (text); dataset_hdx_link (text); hdx_id (text); dataset_hdx_api_link (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_food_security", | ||
"summary" : "['Food Security']", | ||
"columns" : "population_in_phase (bigint); population_fraction_in_phase (double precision); ipc_phase_code (text); ipc_phase_name (text); ipc_type_code (text); reference_period_start (text); reference_period_end (text); dataset_hdx_stub (text); dataset_hdx_provider_stub (text); resource_hdx_id (text); adm0_code (text); location_name (text); adm1_code (text); adm1_name (text); adm2_code (text); adm2_name (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_humanitarian_needs", | ||
"summary" : "['Humanitarian Needs']", | ||
"columns" : "population (bigint); age_range_code (text); disabled_marker (text); sector_code (text); sector_name (text); population_status_code (text); population_group_code (text); reference_period_start (text); reference_period_end (text); dataset_hdx_stub (text); dataset_hdx_provider_stub (text); resource_hdx_id (text); adm0_code (text); location_name (text); adm1_code (text); adm1_name (text); adm2_code (text); gender_code (text); adm2_name (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_national_risk", | ||
"summary" : "['National Risk']", | ||
"columns" : "risk_class (bigint); global_rank (bigint); overall_risk (double precision); hazard_exposure_risk (double precision); vulnerability_risk (double precision); coping_capacity_risk (double precision); meta_missing_indicators_pct (double precision); meta_avg_recentness_years (double precision); reference_period_start (text); reference_period_end (text); dataset_hdx_stub (text); dataset_hdx_provider_stub (text); resource_hdx_id (text); adm0_code (text); location_name (text); " | ||
}, | ||
{ | ||
"table_name" : "hapi_population", | ||
"summary" : "['Baseline Population']", | ||
"columns" : "population (bigint); age_range_code (text); reference_period_start (text); reference_period_end (text); dataset_hdx_stub (text); resource_hdx_id (text); adm0_code (text); location_name (text); adm1_code (text); adm1_name (text); adm2_code (text); gender_code (text); adm2_name (text); " | ||
}, | ||
{ | ||
"table_name" : "hdx_shape_files", | ||
"summary" : "HDX Shape Files", | ||
"columns" : "geometry (USER-DEFINED); OBJECTID (double precision); AREA_SQKM (double precision); Shape_Area (double precision); Shape_Leng (double precision); ADM1ALT2FR (text); ADM0_FR (text); adm0_code (text); date (text); validOn (text); validTo (text); ADM2_FR (text); adm2_code (text); ADM2_REF (text); ADM2ALT1FR (text); ADM2ALT2FR (text); ADM1_EN (text); ADM1ALT1EN (text); ADM1ALT2EN (text); ADM0_EN (text); ADM2_EN (text); ADM2ALT1EN (text); ADM2ALT2EN (text); ADM1_ES (text); ADM1ALT1ES (text); ADM1ALT2ES (text); ADM0_ES (text); ADM2_ES (text); ADM2ALT1ES (text); ADM2ALT2ES (text); ValidTo (text); ADM1_HT (text); ADM1ALT1HT (text); ADM1ALT2HT (text); ADM0_HT (text); ADM2_HT (text); ADM2ALT1HT (text); ADM2ALT2HT (text); ADM1_MY (text); ADM1_ALTPC (text); ADM0_MY (text); ADM2_MY (text); ADM1_PT (text); ADM1ALT1PT (text); ADM1ALT2PT (text); ADM0_PT (text); ADM2_PT (text); ADM2ALT1PT (text); ADM2ALT2PT (text); SD_EN (text); SD_PCODE (text); ADM1_AR (text); ADM1ALT1AR (text); ADM1ALT2AR (text); ADM0_AR (text); ADM2_AR (text); ADM2ALT1AR (text); ADM2ALT2AR (text); admin1Name (text); admin1RefN (text); admin1Na_1 (text); admin1AltN (text); admin1Al_1 (text); admin0Name (text); admin2Name (text); admin2RefN (text); admin2Na_1 (text); admin2AltN (text); admin2Al_1 (text); ADM1_UA (text); ADM1_RU (text); ADM0_UA (text); ADM0_RU (text); ADM2_UA (text); ADM2_RU (text); ADM1_FR (text); adm1_code (text); ADM1_REF (text); ADM1ALT1FR (text); " | ||
} | ||
]} | ||
|
||
|
Oops, something went wrong.