Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Irods ssl connection #19014

Merged
merged 11 commits into from
Nov 12, 2024
14 changes: 14 additions & 0 deletions lib/galaxy/config/sample/object_store_conf.sample.yml
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,20 @@ connection:
timeout: 30
refresh_time: 300
connection_pool_monitor_interval: 3600
# ssl section is optional
ssl:
client_server_negotiation: request_server_negotiation
client_server_policy: CS_NEG_REQUIRE
encryption_algorithm: AES-256-CBC
encryption_key_size: 32
encryption_num_hash_rounds: 16
encryption_salt_size: 8
ssl_verify_server: cert
ssl_ca_certificate_file: /etc/irods/ssl/irods.crt
# The logical section is optional and is meant for defining the remote path
# if not defined the default path is: /zone_name/home/username
logical:
path: /tempZone/home/rods
cache:
path: database/object_store_cache_irods
size: 1000
Expand Down
8 changes: 8 additions & 0 deletions lib/galaxy/config/sample/object_store_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -195,13 +195,21 @@

<!--
Sample iRODS Object Store

The ssl section is optional
The logical section is optional and is meant for defining the remote path
if not defined the default path is: /zone_name/home/username
-->
<!--
<object_store type="irods">
<auth username="rods" password="rods" />
<resource name="demoResc" />
<zone name="tempZone" />
<connection host="localhost" port="1247" timeout="30" refresh_time="300" connection_pool_monitor_interval="3600"/>
<ssl client_server_negotiation="request_server_negotiation" client_server_policy="CS_NEG_REQUIRE"
encryption_algorithm="AES-256-CBC" encryption_key_size="32" encryption_num_hash_rounds="16"
encryption_salt_size="8" ssl_verify_server="cert" ssl_ca_certificate_file="/etc/irods/ssl/irods.crt" />
<logical path="/tempZone/home/rods" />
<cache path="database/object_store_cache_irods" size="1000" cache_updated_data="True" />
<extra_dir type="job_work" path="database/job_working_directory_irods"/>
<extra_dir type="temp" path="database/tmp_irods"/>
Expand Down
115 changes: 97 additions & 18 deletions lib/galaxy/objectstore/irods.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import os
import shutil
import ssl
import threading
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -40,7 +41,7 @@ def _config_xml_error(tag):


def _config_dict_error(key):
msg = "No {key} key in config dictionary".forma(key=key)
msg = f"No {key} key in config dictionary"
raise Exception(msg)


Expand All @@ -52,6 +53,26 @@ def parse_config_xml(config_xml):
username = a_xml[0].get("username")
password = a_xml[0].get("password")

s_xml = config_xml.findall("ssl")
if s_xml:
client_server_negotiation = s_xml[0].get("client_server_negotiation", None)
client_server_policy = s_xml[0].get("client_server_policy", None)
encryption_algorithm = s_xml[0].get("encryption_algorithm", None)
encryption_key_size = int(s_xml[0].get("encryption_key_size", None))
encryption_num_hash_rounds = int(s_xml[0].get("encryption_num_hash_rounds", None))
encryption_salt_size = int(s_xml[0].get("encryption_salt_size", None))
ssl_verify_server = s_xml[0].get("ssl_verify_server", None)
ssl_ca_certificate_file = s_xml[0].get("ssl_ca_certificate_file", None)
else:
client_server_negotiation = None
client_server_policy = None
encryption_algorithm = None
encryption_key_size = None
encryption_num_hash_rounds = None
encryption_salt_size = None
ssl_verify_server = None
ssl_ca_certificate_file = None

r_xml = config_xml.findall("resource")
if not r_xml:
_config_xml_error("resource")
Expand All @@ -71,6 +92,12 @@ def parse_config_xml(config_xml):
refresh_time = int(c_xml[0].get("refresh_time", 300))
connection_pool_monitor_interval = int(c_xml[0].get("connection_pool_monitor_interval", -1))

l_xml = config_xml.findall("logical")
if l_xml:
logical_path = l_xml[0].get("path", None)
else:
logical_path = None

c_xml = config_xml.findall("cache")
if not c_xml:
_config_xml_error("cache")
Expand All @@ -89,6 +116,16 @@ def parse_config_xml(config_xml):
"username": username,
"password": password,
},
"ssl": {
"client_server_negotiation": client_server_negotiation,
"client_server_policy": client_server_policy,
"encryption_algorithm": encryption_algorithm,
"encryption_key_size": encryption_key_size,
"encryption_num_hash_rounds": encryption_num_hash_rounds,
"encryption_salt_size": encryption_salt_size,
"ssl_verify_server": ssl_verify_server,
"ssl_ca_certificate_file": ssl_ca_certificate_file,
},
"resource": {
"name": resource_name,
},
Expand All @@ -102,6 +139,9 @@ def parse_config_xml(config_xml):
"refresh_time": refresh_time,
"connection_pool_monitor_interval": connection_pool_monitor_interval,
},
"logical": {
"path": logical_path,
},
"cache": {
"size": cache_size,
"path": staging_path,
Expand Down Expand Up @@ -139,6 +179,17 @@ def __init__(self, config, config_dict):
if self.password is None:
_config_dict_error("auth->password")

ssl_dict = config_dict.get("ssl") or {}

self.client_server_negotiation = ssl_dict.get("client_server_negotiation")
self.client_server_policy = ssl_dict.get("client_server_policy")
self.encryption_algorithm = ssl_dict.get("encryption_algorithm")
self.encryption_key_size = ssl_dict.get("encryption_key_size")
self.encryption_num_hash_rounds = ssl_dict.get("encryption_num_hash_rounds")
self.encryption_salt_size = ssl_dict.get("encryption_salt_size")
self.ssl_verify_server = ssl_dict.get("ssl_verify_server")
self.ssl_ca_certificate_file = ssl_dict.get("ssl_ca_certificate_file")

resource_dict = config_dict["resource"]
if resource_dict is None:
_config_dict_error("resource")
Expand Down Expand Up @@ -172,6 +223,11 @@ def __init__(self, config, config_dict):
if self.connection_pool_monitor_interval is None:
_config_dict_error("connection->connection_pool_monitor_interval")

logical_dict = config_dict.get("logical") or {}
self.logical_path = logical_dict.get("path") or f"/{self.zone}/home/{self.username}"
Copy link
Contributor Author

@pauldg pauldg Nov 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mvdbeek I think I did cover backward-compatibility by setting the default for self.logical_path to f"/{self.zone}/home/{self.username}" (previously self.home)

if self.logical_path is None:
_config_dict_error("logical->path")

cache_dict = config_dict.get("cache") or {}
self.cache_size = cache_dict.get("size") or self.config.object_store_cache_path
if self.cache_size is None:
Expand All @@ -189,19 +245,29 @@ def __init__(self, config, config_dict):
if irods is None:
raise Exception(IRODS_IMPORT_MESSAGE)

self.home = f"/{self.zone}/home/{self.username}"

if irods is None:
raise Exception(IRODS_IMPORT_MESSAGE)

self.session = iRODSSession(
host=self.host,
port=self.port,
user=self.username,
password=self.password,
zone=self.zone,
refresh_time=self.refresh_time,
)
session_params = {
"host": self.host,
"port": self.port,
"user": self.username,
"password": self.password,
"zone": self.zone,
"refresh_time": self.refresh_time,
"client_server_negotiation": self.client_server_negotiation,
"client_server_policy": self.client_server_policy,
"encryption_algorithm": self.encryption_algorithm,
"encryption_key_size": self.encryption_key_size,
"encryption_num_hash_rounds": self.encryption_num_hash_rounds,
"encryption_salt_size": self.encryption_salt_size,
"ssl_verify_server": self.ssl_verify_server,
"ssl_ca_certificate_file": self.ssl_ca_certificate_file,
"ssl_context": ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH),
}

self.session = iRODSSession(**session_params)

# Set connection timeout
self.session.connection_timeout = self.timeout

Expand Down Expand Up @@ -285,6 +351,16 @@ def _config_to_dict(self):
"username": self.username,
"password": self.password,
},
"ssl": {
"client_server_negotiation": self.client_server_negotiation,
"client_server_policy": self.client_server_policy,
"encryption_algorithm": self.encryption_algorithm,
"encryption_key_size": self.encryption_key_size,
"encryption_num_hash_rounds": self.encryption_num_hash_rounds,
"encryption_salt_size": self.encryption_salt_size,
"ssl_verify_server": self.ssl_verify_server,
"ssl_ca_certificate_file": self.ssl_ca_certificate_file,
},
"resource": {
"name": self.resource,
},
Expand All @@ -298,6 +374,9 @@ def _config_to_dict(self):
"refresh_time": self.refresh_time,
"connection_pool_monitor_interval": self.connection_pool_monitor_interval,
},
"logical": {
"path": self.logical_path,
},
"cache": {
"size": self.cache_size,
"path": self.staging_path,
Expand All @@ -312,7 +391,7 @@ def _get_remote_size(self, rel_path):
data_object_name = p.stem + p.suffix
subcollection_name = p.parent

collection_path = f"{self.home}/{subcollection_name}"
collection_path = f"{self.logical_path}/{subcollection_name}"
data_object_path = f"{collection_path}/{data_object_name}"
options = {kw.DEST_RESC_NAME_KW: self.resource}

Expand All @@ -332,7 +411,7 @@ def _exists_remotely(self, rel_path):
data_object_name = p.stem + p.suffix
subcollection_name = p.parent

collection_path = f"{self.home}/{subcollection_name}"
collection_path = f"{self.logical_path}/{subcollection_name}"
data_object_path = f"{collection_path}/{data_object_name}"
options = {kw.DEST_RESC_NAME_KW: self.resource}

Expand All @@ -354,7 +433,7 @@ def _download(self, rel_path):
data_object_name = p.stem + p.suffix
subcollection_name = p.parent

collection_path = f"{self.home}/{subcollection_name}"
collection_path = f"{self.logical_path}/{subcollection_name}"
data_object_path = f"{collection_path}/{data_object_name}"
# we need to allow irods to override already existing zero-size output files created
# in object store cache during job setup (see also https://github.com/galaxyproject/galaxy/pull/17025#discussion_r1394517033)
Expand Down Expand Up @@ -395,7 +474,7 @@ def _push_to_storage(self, rel_path, source_file=None, from_string=None):
return False

# Check if the data object exists in iRODS
collection_path = f"{self.home}/{subcollection_name}"
collection_path = f"{self.logical_path}/{subcollection_name}"
data_object_path = f"{collection_path}/{data_object_name}"
exists = False

Expand Down Expand Up @@ -473,7 +552,7 @@ def _delete(self, obj, entire_dir: bool = False, **kwargs) -> bool:
if entire_dir and extra_dir:
shutil.rmtree(self._get_cache_path(rel_path), ignore_errors=True)

col_path = f"{self.home}/{rel_path}"
col_path = f"{self.logical_path}/{rel_path}"
col = None
try:
col = self.session.collections.get(col_path)
Expand Down Expand Up @@ -501,7 +580,7 @@ def _delete(self, obj, entire_dir: bool = False, **kwargs) -> bool:
data_object_name = p.stem + p.suffix
subcollection_name = p.parent

collection_path = f"{self.home}/{subcollection_name}"
collection_path = f"{self.logical_path}/{subcollection_name}"
data_object_path = f"{collection_path}/{data_object_name}"

try:
Expand All @@ -527,7 +606,7 @@ def _get_object_url(self, obj, **kwargs):
data_object_name = p.stem + p.suffix
subcollection_name = p.parent

collection_path = f"{self.home}/{subcollection_name}"
collection_path = f"{self.logical_path}/{subcollection_name}"
data_object_path = f"{collection_path}/{data_object_name}"

return data_object_path
Expand Down
11 changes: 11 additions & 0 deletions test/unit/objectstore/irods_object_store_conf_logical_path.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0"?>
<object_store type="irods">
<auth username="rods" password="rods" />
<resource name="demoResc" />
<zone name="tempZone" />
<connection host="localhost" port="1247" timeout="30" refresh_time="300" connection_pool_monitor_interval="3600"/>
<logical path="/tempZone/home/rods" />
<cache path="database/object_store_cache" size="1000" />
<extra_dir type="job_work" path="database/job_working_directory_irods"/>
<extra_dir type="temp" path="database/tmp_irods"/>
</object_store>
13 changes: 13 additions & 0 deletions test/unit/objectstore/irods_object_store_conf_ssl.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0"?>
<object_store type="irods">
<auth username="rods" password="rods" />
<resource name="demoResc" />
<zone name="tempZone" />
<connection host="localhost" port="1247" timeout="30" refresh_time="300" connection_pool_monitor_interval="3600"/>
<ssl client_server_negotiation="request_server_negotiation" client_server_policy="CS_NEG_REQUIRE"
encryption_algorithm="AES-256-CBC" encryption_key_size="32" encryption_num_hash_rounds="16"
encryption_salt_size="8" ssl_verify_server="cert" ssl_ca_certificate_file="/etc/irods/ssl/irods.crt" />
<cache path="database/object_store_cache" size="1000" />
<extra_dir type="job_work" path="database/job_working_directory_irods"/>
<extra_dir type="temp" path="database/tmp_irods"/>
</object_store>
29 changes: 29 additions & 0 deletions test/unit/objectstore/test_irods.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
CONFIG_FILE_NAME_NO_AUTH = "irods_object_store_conf_no_auth.xml"
CONFIG_FILE_NO_AUTH = os.path.join(SCRIPT_DIRECTORY, CONFIG_FILE_NAME_NO_AUTH)

CONFIG_FILE_NAME_SSL = "irods_object_store_conf_ssl.xml"
CONFIG_FILE_SSL = os.path.join(SCRIPT_DIRECTORY, CONFIG_FILE_NAME_SSL)

CONFIG_FILE_NAME_LOGICAL_PATH = "irods_object_store_conf_logical_path.xml"
CONFIG_FILE_LOGICAL_PATH = os.path.join(SCRIPT_DIRECTORY, CONFIG_FILE_NAME_LOGICAL_PATH)


def test_parse_valid_config_xml():
tree = parse_xml(CONFIG_FILE)
Expand All @@ -39,6 +45,29 @@ def test_parse_valid_config_xml():
assert config["extra_dirs"][1]["path"] == "database/tmp_irods"


def test_parse_config_xml_ssl():
tree = parse_xml(CONFIG_FILE_SSL)
root = tree.getroot()
config = parse_config_xml(root)

assert config["ssl"]["client_server_negotiation"] == "request_server_negotiation"
assert config["ssl"]["client_server_policy"] == "CS_NEG_REQUIRE"
assert config["ssl"]["encryption_algorithm"] == "AES-256-CBC"
assert config["ssl"]["encryption_key_size"] == 32
assert config["ssl"]["encryption_num_hash_rounds"] == 16
assert config["ssl"]["encryption_salt_size"] == 8
assert config["ssl"]["ssl_verify_server"] == "cert"
assert config["ssl"]["ssl_ca_certificate_file"] == "/etc/irods/ssl/irods.crt"


def test_parse_config_xml_logical_path():
tree = parse_xml(CONFIG_FILE_LOGICAL_PATH)
root = tree.getroot()
config = parse_config_xml(root)

assert config["logical"]["path"] == "/tempZone/home/rods"


def test_parse_config_xml_no_extra_dir():
tree = parse_xml(CONFIG_FILE_NO_EXTRA_DIR)
root = tree.getroot()
Expand Down
Loading