Skip to content

Commit

Permalink
add odpy conversion scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name committed Nov 29, 2023
1 parent a686f64 commit 0860e81
Show file tree
Hide file tree
Showing 10 changed files with 481 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,5 @@ failed_files.txt
temp_scripts
projects/ios_data_transform/local
projects/ios_data_transform/**log.*
ios_nc_conversion.log*
ios_nc_conversion.log*
projects/ios_data_transform/*-conversion.csv
58 changes: 58 additions & 0 deletions projects/ios_data_transform/odpy-ios-ane-conversion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# File input
input_path: "/data/ios_raw_files/weather_data/**/*.[aA][nN][eE]" # file or glob expression
exclude: "HISTORY" # glob expression of files to exclude
parser: dfo.ios.shell
overwrite: False
multiprocessing: 3 # n processes to run [int] or null for all
errors: "ignore" # raise|ignore
registry:
path: opdy-ios-ane-conversion.csv # file_registry(.csv | .parquet)


sentry:
# dsn: https://[email protected]/4505529390137344
level: INFO
event_level: WARNING
traces_sample_rate: 1.0,

# Attributes and Metadata
variable_attributes: {}
global_attributes: {
summary: "This dataset contains observations made by the Institute of Ocean Sciences of Fisheries and Oceans (DFO) using CTDs mounted on moorings. The moorings were deployed by DFO and partner organizations between 1965 to present. Variables present in the dataset include Temperature, Salinity, Oxygen, Conductivity, Depth, and Pressure as a timeseries. For any questions please contact ([email protected]).",
title: "IOS Weather Stations Data",
infoUrl: "http://www.pac.dfo-mpo.gc.ca/science/oceans/data-donnees/index-eng.html",
institution: "Institute of Ocean Sciences, 9860 West Saanich Road, Sidney, B.C., Canada",
keywords: "CTD, depth, pressure, temperature, salinity, oxygen, subSurfaceSalinity, subSurfaceTemperature",
keywords_vocabulary: "GCMD Science Keywords",
standard_name_vocabulary: "CF Standard Name Table v29",
publisher_name: "Hakai Insitute",
publisher_email: "[email protected]",
publisher_url: "http://www.hakai.org",
comment: "Data converted from IOS Shell format",
acknowledgement: "n/a",
license: "canada open government license",
Conventions: "COARDS, CF-1.7, ACDD-1.3",
processing_level: "2"
}
file_specific_attributes_path: null # Path to csv file with one column called "file"
global_attribute_mapping:
path: null # Path to csv file (accept glob parameter for multiple files)
mapping: null # mapping dataframe
by: [] # global attributes list
log_level: WARNING # [null, WARNING,ERROR] level log when no mapping exist

# Geospatial References
reference_stations:
path: null
maximum_distance_from_reference_station_km: null

reference_geograhical_areas:
path: null

# Outputs
output:
path: "/data/erddap_data/IOS_ANE_data"
file_name: null
file_preffix: ""
file_suffix: ""
output_format: .nc
57 changes: 57 additions & 0 deletions projects/ios_data_transform/odpy-ios-bot-conversion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# File input
input_path: "/data/ios_raw_files/cruise_data/**/*.[bBcC][oOhH][eEtT]" # file or glob expression
exclude: "HISTORY" # glob expression of files to exclude
parser: dfo.ios.shell
overwrite: False
multiprocessing: 3 # n processes to run [int] or null for all
errors: "ignore" # raise|ignore
registry:
path: opdy-ios-bot-conversion.csv # file_registry(.csv | .parquet)

sentry:
# dsn: https://[email protected]/4505529390137344
level: INFO
event_level: WARNING
traces_sample_rate: 1.0,

# Attributes and Metadata
variable_attributes: {}
global_attributes: {
summary: "This dataset contains observations made by the Institute of Ocean Sciences of Fisheries and Oceans (DFO) using CTDs mounted on moorings. The moorings were deployed by DFO and partner organizations between 1965 to present. Variables present in the dataset include Temperature, Salinity, Oxygen, Conductivity, Depth, and Pressure as a timeseries. For any questions please contact ([email protected]).",
title: "Data from Niskin bottle samples",
infoUrl: "http://www.pac.dfo-mpo.gc.ca/science/oceans/data-donnees/index-eng.html",
institution: "Institute of Ocean Sciences, 9860 West Saanich Road, Sidney, B.C., Canada",
keywords: "CTD, depth, pressure, temperature, salinity, oxygen, subSurfaceSalinity, subSurfaceTemperature",
keywords_vocabulary: "GCMD Science Keywords",
standard_name_vocabulary: "CF Standard Name Table v29",
publisher_name: "Hakai Insitute",
publisher_email: "[email protected]",
publisher_url: "http://www.hakai.org",
comment: "Data converted from IOS Shell format",
acknowledgement: "n/a",
license: "canada open government license",
Conventions: "COARDS, CF-1.7, ACDD-1.3",
processing_level: "2"
}
file_specific_attributes_path: null # Path to csv file with one column called "file"
global_attribute_mapping:
path: null # Path to csv file (accept glob parameter for multiple files)
mapping: null # mapping dataframe
by: [] # global attributes list
log_level: WARNING # [null, WARNING,ERROR] level log when no mapping exist

# Geospatial References
reference_stations:
path: null
maximum_distance_from_reference_station_km: null

reference_geograhical_areas:
path: "/home/cioos/cioos-siooc_data_transform/projects/ios_data_transform/ios_polygons.geojson"

# Outputs
output:
path: "/data/erddap_data/IOS_BOT_profiles"
file_name: null
file_preffix: ""
file_suffix: ""
output_format: .nc
58 changes: 58 additions & 0 deletions projects/ios_data_transform/odpy-ios-ctd-conversion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# File input
input_path: "/data/ios_raw_files/cruise_data/**/*.[cC][tT][dD]" # file or glob expression
exclude: "HISTORY" # glob expression of files to exclude
parser: dfo.ios.shell
overwrite: False
multiprocessing: 3 # n processes to run [int] or null for all
errors: "ignore" # raise|ignore
registry:
path: opdy-ios-ctd-conversion.csv # file_registry(.csv | .parquet)


sentry:
# dsn: https://[email protected]/4505529390137344
level: INFO
event_level: WARNING
traces_sample_rate: 1.0,

# Attributes and Metadata
variable_attributes: {}
global_attributes: {
summary: "This dataset contains observations made by the Institute of Ocean Sciences of Fisheries and Oceans (DFO) using CTDs mounted on moorings. The moorings were deployed by DFO and partner organizations between 1965 to present. Variables present in the dataset include Temperature, Salinity, Oxygen, Conductivity, Depth, and Pressure as a timeseries. For any questions please contact ([email protected]).",
title: "IOS CTD profile data",
infoUrl: "http://www.pac.dfo-mpo.gc.ca/science/oceans/data-donnees/index-eng.html",
institution: "Institute of Ocean Sciences, 9860 West Saanich Road, Sidney, B.C., Canada",
keywords: "CTD, depth, pressure, temperature, salinity, oxygen, subSurfaceSalinity, subSurfaceTemperature",
keywords_vocabulary: "GCMD Science Keywords",
standard_name_vocabulary: "CF Standard Name Table v29",
publisher_name: "Hakai Insitute",
publisher_email: "[email protected]",
publisher_url: "http://www.hakai.org",
comment: "Data converted from IOS Shell format",
acknowledgement: "n/a",
license: "canada open government license",
Conventions: "COARDS, CF-1.7, ACDD-1.3",
processing_level: "2"
}
file_specific_attributes_path: null # Path to csv file with one column called "file"
global_attribute_mapping:
path: null # Path to csv file (accept glob parameter for multiple files)
mapping: null # mapping dataframe
by: [] # global attributes list
log_level: WARNING # [null, WARNING,ERROR] level log when no mapping exist

# Geospatial References
reference_stations:
path: null
maximum_distance_from_reference_station_km: null

reference_geograhical_areas:
path: "/home/cioos/cioos-siooc_data_transform/projects/ios_data_transform/ios_polygons.geojson"

# Outputs
output:
path: "/data/erddap_data/IOS_CTD_profiles"
file_name: null
file_preffix: ""
file_suffix: ""
output_format: .nc
58 changes: 58 additions & 0 deletions projects/ios_data_transform/odpy-ios-cur-conversion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# File input
input_path: "/data/ios_raw_files/mooring_data/**/*.[cC][uU][rR]" # file or glob expression
exclude: "HISTORY" # glob expression of files to exclude
parser: dfo.ios.shell
overwrite: False
multiprocessing: 3 # n processes to run [int] or null for all
errors: "ignore" # raise|ignore
registry:
path: opdy-ios-cur-conversion.csv # file_registry(.csv | .parquet)


sentry:
# dsn: https://[email protected]/4505529390137344
level: INFO
event_level: WARNING
traces_sample_rate: 1.0,

# Attributes and Metadata
variable_attributes: {}
global_attributes: {
summary: "This dataset contains observations made by the Institute of Ocean Sciences of Fisheries and Oceans (DFO) using CTDs mounted on moorings. The moorings were deployed by DFO and partner organizations between 1965 to present. Variables present in the dataset include Temperature, Salinity, Oxygen, Conductivity, Depth, and Pressure as a timeseries. For any questions please contact ([email protected]).",
title: "Moored CTD data from IOS",
infoUrl: "http://www.pac.dfo-mpo.gc.ca/science/oceans/data-donnees/index-eng.html",
institution: "Institute of Ocean Sciences, 9860 West Saanich Road, Sidney, B.C., Canada",
keywords: "CTD, depth, pressure, temperature, salinity, oxygen, subSurfaceSalinity, subSurfaceTemperature",
keywords_vocabulary: "GCMD Science Keywords",
standard_name_vocabulary: "CF Standard Name Table v29",
publisher_name: "Hakai Insitute",
publisher_email: "[email protected]",
publisher_url: "http://www.hakai.org",
comment: "Data converted from IOS Shell format",
acknowledgement: "n/a",
license: "canada open government license",
Conventions: "COARDS, CF-1.7, ACDD-1.3",
processing_level: "2"
}
file_specific_attributes_path: null # Path to csv file with one column called "file"
global_attribute_mapping:
path: null # Path to csv file (accept glob parameter for multiple files)
mapping: null # mapping dataframe
by: [] # global attributes list
log_level: WARNING # [null, WARNING,ERROR] level log when no mapping exist

# Geospatial References
reference_stations:
path: null
maximum_distance_from_reference_station_km: null

reference_geograhical_areas:
path: "/home/cioos/cioos-siooc_data_transform/projects/ios_data_transform/ios_polygons.geojson"

# Outputs
output:
path: "/data/erddap_data/IOS_CUR_moorings"
file_name: null
file_preffix: ""
file_suffix: ""
output_format: .nc
57 changes: 57 additions & 0 deletions projects/ios_data_transform/odpy-ios-drf-conversion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# File input
input_path: "/data/ios_raw_files/drifter_data/**/*.[dD][rR][fF]" # file or glob expression
exclude: "HISTORY" # glob expression of files to exclude
parser: dfo.ios.shell
overwrite: False
multiprocessing: True # n processes to run [int] or null for all
errors: "ignore" # raise|ignore
registry:
path: opdy-ios-drf-conversion.csv # file_registry(.csv | .parquet)

sentry:
# dsn: https://[email protected]/4505529390137344
level: INFO
event_level: WARNING
traces_sample_rate: 1.0,

# Attributes and Metadata
variable_attributes: {}
global_attributes: {
summary: "This dataset contains observations made by the Institute of Ocean Sciences of Fisheries and Oceans (DFO) using CTDs mounted on moorings. The moorings were deployed by DFO and partner organizations between 1965 to present. Variables present in the dataset include Temperature, Salinity, Oxygen, Conductivity, Depth, and Pressure as a timeseries. For any questions please contact ([email protected]).",
title: "IOS Drifter Data",
infoUrl: "http://www.pac.dfo-mpo.gc.ca/science/oceans/data-donnees/index-eng.html",
institution: "Institute of Ocean Sciences, 9860 West Saanich Road, Sidney, B.C., Canada",
keywords: "CTD, depth, pressure, temperature, salinity, oxygen, subSurfaceSalinity, subSurfaceTemperature",
keywords_vocabulary: "GCMD Science Keywords",
standard_name_vocabulary: "CF Standard Name Table v29",
publisher_name: "Hakai Insitute",
publisher_email: "[email protected]",
publisher_url: "http://www.hakai.org",
comment: "Data converted from IOS Shell format",
acknowledgement: "n/a",
license: "canada open government license",
Conventions: "COARDS, CF-1.7, ACDD-1.3",
processing_level: "2"
}
file_specific_attributes_path: null # Path to csv file with one column called "file"
global_attribute_mapping:
path: null # Path to csv file (accept glob parameter for multiple files)
mapping: null # mapping dataframe
by: [] # global attributes list
log_level: WARNING # [null, WARNING,ERROR] level log when no mapping exist

# Geospatial References
reference_stations:
path: null
maximum_distance_from_reference_station_km: null

reference_geograhical_areas:
path: "/home/cioos/cioos-siooc_data_transform/projects/ios_data_transform/ios_polygons.geojson"

# Outputs
output:
path: "/data/erddap_data/IOS_DRF_data"
file_name: null
file_preffix: ""
file_suffix: ""
output_format: .nc
58 changes: 58 additions & 0 deletions projects/ios_data_transform/odpy-ios-mctd-conversion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# File input
input_path: "/data/ios_raw_files/mooring_data/**/*.[cC][tT][dD]" # file or glob expression
exclude: "HISTORY" # glob expression of files to exclude
parser: dfo.ios.shell
overwrite: False
multiprocessing: 3 # n processes to run [int] or null for all
errors: "ignore" # raise|ignore
registry:
path: opdy-ios-mctd-conversion.csv # file_registry(.csv | .parquet)


sentry:
# dsn: https://[email protected]/4505529390137344
level: INFO
event_level: WARNING
traces_sample_rate: 1.0,

# Attributes and Metadata
variable_attributes: {}
global_attributes: {
summary: "This dataset contains observations made by the Institute of Ocean Sciences of Fisheries and Oceans (DFO) using CTDs mounted on moorings. The moorings were deployed by DFO and partner organizations between 1965 to present. Variables present in the dataset include Temperature, Salinity, Oxygen, Conductivity, Depth, and Pressure as a timeseries. For any questions please contact ([email protected]).",
title: "Moored CTD data from IOS",
infoUrl: "http://www.pac.dfo-mpo.gc.ca/science/oceans/data-donnees/index-eng.html",
institution: "Institute of Ocean Sciences, 9860 West Saanich Road, Sidney, B.C., Canada",
keywords: "CTD, depth, pressure, temperature, salinity, oxygen, subSurfaceSalinity, subSurfaceTemperature",
keywords_vocabulary: "GCMD Science Keywords",
standard_name_vocabulary: "CF Standard Name Table v29",
publisher_name: "Hakai Insitute",
publisher_email: "[email protected]",
publisher_url: "http://www.hakai.org",
comment: "Data converted from IOS Shell format",
acknowledgement: "n/a",
license: "canada open government license",
Conventions: "COARDS, CF-1.7, ACDD-1.3",
processing_level: "2"
}
file_specific_attributes_path: null # Path to csv file with one column called "file"
global_attribute_mapping:
path: null # Path to csv file (accept glob parameter for multiple files)
mapping: null # mapping dataframe
by: [] # global attributes list
log_level: WARNING # [null, WARNING,ERROR] level log when no mapping exist

# Geospatial References
reference_stations:
path: null
maximum_distance_from_reference_station_km: null

reference_geograhical_areas:
path: "/home/cioos/cioos-siooc_data_transform/projects/ios_data_transform/ios_polygons.geojson"

# Outputs
output:
path: "/data/erddap_data/IOS_CTD_moorings"
file_name: null
file_preffix: ""
file_suffix: ""
output_format: .nc
Loading

0 comments on commit 0860e81

Please sign in to comment.