From 0c61ee963bf886cd187d5ee2ca47a950086fc732 Mon Sep 17 00:00:00 2001 From: Alex Leach Date: Sun, 18 Jul 2021 10:15:55 +0000 Subject: [PATCH] Fix TLS implementation and consolidate the jupyterhub public_url with the traefik entrypoint. * When using an external traefik proxy, the TraefikProxy classes will now query traefik's API for the entrypoint that matches the assigned public_url. See `TraefikProxy._get_traefik_entrypoint()`. * Only a single entrypoint is allowed by this manner (reverting suggested change in previous commit 3501d085a200dcbb2f9ca12f61b146890f4b5a57) to have multiple, configurable entrypoints. * Done away with configurable option `traefik_tls`, instead determining it from the scheme of `public_url`, i.e. http or https. * Fixed docstrings in kv_proxy.py, if sphinx complained. * Implemented various suggested changes by @GeorgianaElena in https://github.com/jupyterhub/traefik-proxy/pull/133 * Removed the standalone configuration loading and dumping functions from traefik_utils, leaving them only in TraefikConfigFileHandler. * Updated test suite so works when testing https URLs (without validation, as traefik will self-sign certificates) as well as http URLs. Currently, proxytest.py will do either http or https. Should it run both? --- docs/source/index.rst | 2 +- docs/source/yaml.md | 2 +- jupyterhub_traefik_proxy/fileprovider.py | 16 +++-- jupyterhub_traefik_proxy/proxy.py | 84 ++++++++++++++++++------ tests/conftest.py | 2 + tests/proxytest.py | 13 +++- tests/utils.py | 3 +- 7 files changed, 94 insertions(+), 28 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 0f73a31e..1d3a705e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -39,7 +39,7 @@ Getting Started .. toctree:: :maxdepth: 1 - fileprovider + file etcd consul diff --git a/docs/source/yaml.md b/docs/source/yaml.md index 0f4178d9..e422b0fb 100644 --- a/docs/source/yaml.md +++ b/docs/source/yaml.md @@ -52,7 +52,7 @@ By **default**, Traefik will search for `traefik.toml` and `rules.toml` in the f * $HOME/.traefik/ * . the working directory -You can override this in TraefikFileProviderProxy, by modifying the **toml_static_config_file** argument: +You can override this in TraefikFileProviderProxy, by modifying the **static_config_file** argument: ```python c.TraefikFileProviderProxy.static_config_file="/path/to/static_config_filename.toml" diff --git a/jupyterhub_traefik_proxy/fileprovider.py b/jupyterhub_traefik_proxy/fileprovider.py index 42acb013..38558cdd 100644 --- a/jupyterhub_traefik_proxy/fileprovider.py +++ b/jupyterhub_traefik_proxy/fileprovider.py @@ -27,8 +27,7 @@ from traitlets import Any, default, Unicode, observe from . import traefik_utils -from jupyterhub.proxy import Proxy -from jupyterhub_traefik_proxy import TraefikProxy +from .proxy import TraefikProxy class TraefikFileProviderProxy(TraefikProxy): @@ -180,8 +179,6 @@ async def add_route(self, routespec, target, data): target (str): A full URL that will be the target of this route. data (dict): A JSONable dict that will be associated with this route, and will be returned when retrieving information about this route. - FIXME: Why do we need to pass data back and forth to traefik? - Traefik v2 doesn't seem to allow a data key... Will raise an appropriate Exception (FIXME: find what?) if the route could not be added. @@ -195,6 +192,9 @@ async def add_route(self, routespec, target, data): router_alias = traefik_utils.generate_alias(traefik_routespec, "router") rule = traefik_utils.generate_rule(traefik_routespec) + if not self.traefik_entrypoint: + self.traefik_entrypoint = await self._get_traefik_entrypoint() + async with self.mutex: # If we've emptied the http and/or routers section, create it. if "http" not in self.dynamic_config: @@ -214,7 +214,15 @@ async def add_route(self, routespec, target, data): self.dynamic_config["http"]["routers"][router_alias] = { "service": service_alias, "rule": rule, + "entryPoints": [self.traefik_entrypoint] } + + # Enable TLS on this router if globally enabled + if self.is_https: + self.dynamic_config["http"]["routers"][router_alias].update({ + "tls": {} + }) + # Add the data node to a separate top-level node, so traefik doesn't complain. self.dynamic_config["jupyter"]["routers"][router_alias] = { "data": data diff --git a/jupyterhub_traefik_proxy/proxy.py b/jupyterhub_traefik_proxy/proxy.py index 0a785e61..b56411ba 100644 --- a/jupyterhub_traefik_proxy/proxy.py +++ b/jupyterhub_traefik_proxy/proxy.py @@ -21,7 +21,6 @@ import json from os.path import abspath, dirname, join from subprocess import Popen, TimeoutExpired -import asyncio.subprocess from urllib.parse import urlparse from traitlets import Any, Bool, Dict, Integer, Unicode, default @@ -65,6 +64,20 @@ class TraefikProxy(Proxy): config=True, help="""The provider name that Traefik expects, e.g. file, consul, etcd""" ) + is_https = Bool( + help="""Whether :attr:`.public_url` specifies an https entrypoint""" + ) + + @default("is_https") + def get_is_https(self): + # Check if we set https + return urlparse(self.public_url).scheme == "https" + + traefik_entrypoint = Unicode( + help="""The traefik entrypoint names, to which each """ + """jupyterhub-configred Traefik router is assigned""" + ) + def __init__(self, **kwargs): super().__init__(**kwargs) if kwargs.get('debug', self.debug) == True: @@ -81,7 +94,41 @@ def __init__(self, **kwargs): self.log.addHandler(handler) self.log.debug(f"Initialising {type(self).__name__}") - #if kwargs.get('debug', self.debug) is True: + + async def _get_traefik_entrypoint(self): + """Find the traefik entrypoint that matches our :attrib:`self.public_url`""" + if self.should_start: + if self.is_https: + return "websecure" + else: + return "web" + import re + # FIXME: Adding '_wait_for_static_config' to get through 'external' + # tests. Would this be required in the 'real world'? + # Adding _wait_for_static_config to the 'external' conftests instead... + #await self._wait_for_static_config() + resp = await self._traefik_api_request("/api/entrypoints") + json_data = json.loads(resp.body) + public_url = urlparse(self.public_url) + hub_port = public_url.port + if not hub_port: + # If the port is not specified, then use the default port + # according to the scheme (http, or https) + if public_url.scheme == 'http': + hub_port = 80 + elif public_url.scheme == 'https': + hub_port = 443 + else: + raise ValueError(f"Cannot discern public_url port from {self.public_url}!") + # Traefik entrypoint format described at:- + # https://doc.traefik.io/traefik/routing/entrypoints/#address + entrypoint_re = re.compile('([^:]+)?:([0-9]+)/?(tcp|udp)?') + for entrypoint in json_data: + host, port, prot = entrypoint_re.match(entrypoint["address"]).groups() + if int(port) == hub_port: + return entrypoint["name"] + entrypoints = [entrypoint["address"] for entrypoint in json_data] + raise ValueError(f"No traefik entrypoint ports ({entrypoints}) match public_url: {self.public_url}!") @default("traefik_api_password") def _warn_empty_password(self): @@ -268,20 +315,22 @@ async def _setup_traefik_static_config(self): self.static_config["log"] = { "level": self.traefik_log_level } - entryPoints = {} - - if self.ssl_cert and self.ssl_key: - entryPoints["websecure"] = { - "address": ":" + str(urlparse(self.public_url).port), - "tls": {}, + # FIXME: Do we only create a single entrypoint for jupyterhub? + # Why not have an http and https entrypoint? + if not self.traefik_entrypoint: + self.traefik_entrypoint = await self._get_traefik_entrypoint() + + entrypoints = { + # The entrypoint jupyterhub will listen on + self.traefik_entrypoint : { + "address": f":{urlparse(self.public_url).port}", + }, + "enter_api" : { + "address": f":{urlparse(self.traefik_api_url).port}", } - else: - entryPoints["web"] = {"address": ":" + str(urlparse(self.public_url).port)} - - entryPoints["enter_api"] = { - "address": ":" + str(urlparse(self.traefik_api_url).port), } - self.static_config["entryPoints"] = entryPoints + + self.static_config["entryPoints"] = entrypoints self.static_config["api"] = {"dashboard": True} handler = traefik_utils.TraefikConfigFileHandler(self.static_config_file) @@ -439,12 +488,9 @@ async def get_route(self, routespec): raise NotImplementedError() async def persist_dynamic_config(self): - """Update the Traefik dynamic configuration, depending on the backend + """Save the Traefik dynamic configuration, depending on the backend provider in use. This is used to e.g. set up the api endpoint's authentication (username and password), as well as default tls - certificates to use. - - :arg:`settings` is a Dict containing the traefik settings, which will - be updated on the Traefik provider depending on the subclass in use. + certificates to use, when should_start is True. """ raise NotImplementedError() diff --git a/tests/conftest.py b/tests/conftest.py index 61b65eb3..9eec1cbb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -91,6 +91,7 @@ async def external_toml_proxy(launch_traefik_file): dynamic_config_file=dynamic_config_file, ) + await proxy._wait_for_static_config() yield proxy os.remove(dynamic_config_file) @@ -109,6 +110,7 @@ async def external_yaml_proxy(launch_traefik_file): dynamic_config_file=dynamic_config_file, ) + await proxy._wait_for_static_config() yield proxy os.remove(dynamic_config_file) diff --git a/tests/proxytest.py b/tests/proxytest.py index 3e389704..f63a920d 100644 --- a/tests/proxytest.py +++ b/tests/proxytest.py @@ -366,6 +366,7 @@ async def test_host_origin_headers(proxy, launch_backend): req_url, method="GET", headers={"Host": expected_host_header, "Origin": expected_origin_header}, + validate_cert=False ) resp = await AsyncHTTPClient().fetch(req) @@ -416,6 +417,7 @@ async def test_check_routes(proxy, username): async def test_websockets(proxy, launch_backend): + import ssl routespec = "/user/username/" target = "http://127.0.0.1:9000" data = {} @@ -443,9 +445,16 @@ async def test_websockets(proxy, launch_backend): if proxy.public_url.endswith("/"): public_url = proxy.public_url[:-1] - req_url = "ws://" + urlparse(proxy.public_url).netloc + routespec + if proxy.is_https: + kwargs = {'ssl': ssl._create_unverified_context()} + scheme = "wss://" + else: + kwargs = {} + scheme = "ws://" + req_url = scheme + urlparse(proxy.public_url).netloc + routespec - async with websockets.connect(req_url) as websocket: + # Don't validate the ssl certificate, it's self-signed by traefik + async with websockets.connect(req_url, **kwargs) as websocket: port = await websocket.recv() assert port == str(default_backend_port) diff --git a/tests/utils.py b/tests/utils.py index c8d0f148..26420e57 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -43,9 +43,10 @@ async def get_responding_backend_port(traefik_url, path): traefik_url + "".join("/" + path.split("/", 1)[1]), method="GET", headers={"Host": path.split("/")[0]}, + validate_cert=False, ) else: - req = traefik_url + path + req = HTTPRequest(traefik_url + path, validate_cert=False) try: resp = await AsyncHTTPClient().fetch(req)