From 340bea5609f581bb31d792ee2afd30dac4b02a9b Mon Sep 17 00:00:00 2001
From: Alex Carney <alcarneyme@gmail.com>
Date: Sun, 25 Feb 2024 19:44:13 +0000
Subject: [PATCH] lsp: Refactor `SphinxManager`

This commit builds on the changes introduced to the
`SubprocessSphinxClient` to greatly simplify and improve the
robustness of managing the set of `SphinxClients` in use by the
language server.

There were a few drawbacks to the previous approach

- While await on the `_client_creating` task during client creation
  appeared to work, it still often led to the creation of duplicated
  clients.

- By not recognizing when a given configuration was broken, the
  manager would continually attempt to spawn client instances.

- It was not uncommon to enter an infinite loop of spawning clients!

The new system introduced in this commit *shouldn't* suffer from any
of the above issues.

- Instead of indexing clients by `src_uri`, they are instead indexed
  by the scope of the configuration governing the instance. While
  still not perfect, we should be able to accurately determine the
  relevant client for a given uri in more scenarios.

- To indicate that we are unable to create a client for a given
  config we now store a `None` in the clients dictionary - preventing
  any further attempts at spawning a client.

- Taking advantage of the fact clients are indexed by scope and that
  the client itself is `await`-able, we can have multiple consumers of
  the client awaiting on the client - preventing the issue of
  duplicated clients.

Currently, there is a quirk to the system where the first batch of
callers to `get_client` in a given scope will be told there is no
client. However, they have in fact "primed" the system by creating a
config subscription which will asynchronously populate the `clients`
dict with an unstarted client.

The next batch of callers will then discover that there is a client
instance available and `await` it, starting the underlying Sphinx
process.

Based on some initial testing this "quirk" may actually be desirable,
the `get_client` method is called often enough that I wasn't able to
notice a real delay in starting the Sphinx process up. It also has the
nice property that when restarting the server with multiple projects
open, the flood of `didOpen` notifications from the client is enough
to "prime" the system for each project, but the project itself is only
activated when working on the relevant file in the editor.

One final note, there are plenty of improvements still to be made -
error handling, config changes etc, but hopefully this is a solid base
we can start building on.
---
 lib/esbonio/esbonio/server/__init__.py        |   2 +
 .../server/features/sphinx_manager/manager.py | 151 ++++-----
 .../server/features/test_sphinx_manager.py    | 291 ++++++++++++++----
 3 files changed, 284 insertions(+), 160 deletions(-)

diff --git a/lib/esbonio/esbonio/server/__init__.py b/lib/esbonio/esbonio/server/__init__.py
index ae67baca5..91ccd317d 100644
--- a/lib/esbonio/esbonio/server/__init__.py
+++ b/lib/esbonio/esbonio/server/__init__.py
@@ -9,6 +9,7 @@
 from .feature import LanguageFeature
 from .server import EsbonioLanguageServer
 from .server import EsbonioWorkspace
+from .setup import create_language_server
 
 __all__ = (
     "LOG_NAMESPACE",
@@ -21,4 +22,5 @@
     "LanguageFeature",
     "MemoryHandler",
     "Uri",
+    "create_language_server",
 )
diff --git a/lib/esbonio/esbonio/server/features/sphinx_manager/manager.py b/lib/esbonio/esbonio/server/features/sphinx_manager/manager.py
index 2dbf27e0e..f1a3204bb 100644
--- a/lib/esbonio/esbonio/server/features/sphinx_manager/manager.py
+++ b/lib/esbonio/esbonio/server/features/sphinx_manager/manager.py
@@ -1,30 +1,30 @@
 from __future__ import annotations
 
 import asyncio
-import sys
+import inspect
 import typing
 import uuid
-from typing import Callable
-from typing import Dict
-from typing import Optional
 
 import lsprotocol.types as lsp
 
 import esbonio.sphinx_agent.types as types
-from esbonio.server import EventSource
-from esbonio.server import LanguageFeature
+from esbonio import server
 from esbonio.server import Uri
 
+from .client import ClientState
 from .config import SphinxConfig
 
 if typing.TYPE_CHECKING:
-    from .client import SphinxClient
+    from typing import Callable
+    from typing import Dict
+    from typing import Optional
 
+    from .client import SphinxClient
 
-SphinxClientFactory = Callable[["SphinxManager"], "SphinxClient"]
+    SphinxClientFactory = Callable[["SphinxManager", "SphinxConfig"], "SphinxClient"]
 
 
-class SphinxManager(LanguageFeature):
+class SphinxManager(server.LanguageFeature):
     """Responsible for managing Sphinx application instances."""
 
     def __init__(self, client_factory: SphinxClientFactory, *args, **kwargs):
@@ -33,18 +33,18 @@ def __init__(self, client_factory: SphinxClientFactory, *args, **kwargs):
         self.client_factory = client_factory
         """Used to create new Sphinx client instances."""
 
-        self.clients: Dict[Uri, SphinxClient] = {}
+        self.clients: Dict[str, Optional[SphinxClient]] = {
+            # Prevent any clients from being created in the global scope.
+            "": None,
+        }
         """Holds currently active Sphinx clients."""
 
-        self._events = EventSource(self.logger)
+        self._events = server.EventSource(self.logger)
         """The SphinxManager can emit events."""
 
         self._pending_builds: Dict[str, asyncio.Task] = {}
         """Holds tasks that will trigger a build after a given delay if not cancelled."""
 
-        self._client_creating: Optional[asyncio.Task] = None
-        """If set, indicates we're in the process of setting up a new client."""
-
         self._progress_tokens: Dict[str, str] = {}
         """Holds work done progress tokens."""
 
@@ -56,7 +56,7 @@ async def document_change(self, params: lsp.DidChangeTextDocumentParams):
             return
 
         client = await self.get_client(uri)
-        if client is None or client.id is None:
+        if client is None:
             return
 
         # Cancel any existing pending builds
@@ -78,7 +78,7 @@ async def document_save(self, params: lsp.DidSaveTextDocumentParams):
             return
 
         client = await self.get_client(uri)
-        if client is None or client.id is None:
+        if client is None:
             return
 
         # Cancel any existing pending builds
@@ -90,20 +90,12 @@ async def document_save(self, params: lsp.DidSaveTextDocumentParams):
     async def shutdown(self, params: None):
         """Called when the server is instructed to ``shutdown``."""
 
-        # Stop creating any new clients.
-        if self._client_creating and not self._client_creating.done():
-            args = {}
-            if sys.version_info.minor > 8:
-                args["msg"] = "Server is shutting down"
-
-            self.logger.debug("Aborting client creation")
-            self._client_creating.cancel(**args)
-
         # Stop any existing clients.
         tasks = []
         for client in self.clients.values():
-            self.logger.debug("Stopping SphinxClient: %s", client)
-            tasks.append(asyncio.create_task(client.stop()))
+            if client:
+                self.logger.debug("Stopping SphinxClient: %s", client)
+                tasks.append(asyncio.create_task(client.stop()))
 
         await asyncio.gather(*tasks)
 
@@ -118,7 +110,7 @@ async def trigger_build_after(self, uri: Uri, app_id: str, delay: float):
     async def trigger_build(self, uri: Uri):
         """Trigger a build for the relevant Sphinx application for the given uri."""
         client = await self.get_client(uri)
-        if client is None or client.building:
+        if client is None or client.state != ClientState.Running:
             return
 
         # Pass through any unsaved content to the Sphinx agent.
@@ -149,86 +141,50 @@ async def trigger_build(self, uri: Uri):
     async def get_client(self, uri: Uri) -> Optional[SphinxClient]:
         """Given a uri, return the relevant sphinx client instance for it."""
 
-        # Wait until the new client is created - it might be the one we're looking for!
-        if self._client_creating:
-            await self._client_creating
-
-        # Always check the fully resolved uri.
-        resolved_uri = uri.resolve()
-
-        for src_uri, client in self.clients.items():
-            if resolved_uri in (await client.get_src_uris()):
-                return client
-
-            # For now assume a single client instance per srcdir.
-            # This *should* prevent us from spwaning endless client instances
-            # when given a file located near a valid Sphinx project - but not actually
-            # part of it.
-            in_src_dir = str(resolved_uri).startswith(str(src_uri))
-            if in_src_dir:
-                # Of course, we can only tell if a uri truly is not in a project
-                # when the build file map is populated!
-                # if len(client.build_file_map) == 0:
-                return client
-
-        # Create a new client instance.
-        self._client_creating = asyncio.create_task(self._create_client(uri))
-        try:
-            return await self._client_creating
-        finally:
-            # Be sure to unset the task when it resolves
-            self._client_creating = None
-
-    async def _create_client(self, uri: Uri) -> Optional[SphinxClient]:
-        """Create a new sphinx client instance."""
-        # TODO: Replace with config subscription
-        await self.server.ready
-        config = self.server.configuration.get(
-            "esbonio.sphinx", SphinxConfig, scope=uri
-        )
-        if config is None:
+        scope = self.server.configuration.scope_for(uri)
+        if scope not in self.clients:
+            self.logger.debug("No client found, creating new subscription")
+            self.server.configuration.subscribe(
+                "esbonio.sphinx",
+                SphinxConfig,
+                self._create_or_replace_client,
+                scope=uri,
+            )
+            # The first few callers in a given scope will miss out, but that shouldn't matter
+            # too much
             return None
 
-        resolved = config.resolve(uri, self.server.workspace, self.logger)
-        if resolved is None:
+        if (client := self.clients[scope]) is None:
+            self.logger.debug("No applicable client for uri: %s", uri)
             return None
 
-        client = self.client_factory(self)
+        return await client
 
-        try:
-            await client.start(resolved)
-        except Exception as exc:
-            message = "Unable to start sphinx-agent"
-            self.logger.error(message, exc_info=True)
-            self.server.show_message(f"{message}: {exc}", lsp.MessageType.Error)
+    async def _create_or_replace_client(
+        self, event: server.ConfigChangeEvent[SphinxConfig]
+    ):
+        """Create or replace thesphinx client instance for the given config."""
 
-            return None
+        # TODO: Handle replacement!
+        config = event.value
 
-        try:
-            sphinx_info = await client.create_application(resolved)
-        except Exception as exc:
-            message = "Unable to create sphinx application"
-            self.logger.error(message, exc_info=True)
-            self.server.show_message(f"{message}: {exc}", lsp.MessageType.Error)
-
-            await client.stop()
-            return None
+        # Do not try and create clients in the global scope
+        if event.scope == "":
+            return
 
-        if client.src_uri is None:
-            self.logger.error("No src uri!")
-            await client.stop()
-            return None
+        resolved = config.resolve(
+            Uri.parse(event.scope), self.server.workspace, self.logger
+        )
+        if resolved is None:
+            self.clients[event.scope] = None
+            return
 
-        self.server.lsp.notify("sphinx/appCreated", sphinx_info)
-        self.clients[client.src_uri] = client
-        return client
+        self.clients[event.scope] = self.client_factory(self, resolved)
+        self.logger.debug("Client created for scope %s", event.scope)
 
     async def start_progress(self, client: SphinxClient):
         """Start reporting work done progress for the given client."""
 
-        if client.id is None:
-            return
-
         token = str(uuid.uuid4())
         self.logger.debug("Starting progress: '%s'", token)
 
@@ -245,9 +201,6 @@ async def start_progress(self, client: SphinxClient):
         )
 
     def stop_progress(self, client: SphinxClient):
-        if client.id is None:
-            return
-
         if (token := self._progress_tokens.pop(client.id, None)) is None:
             return
 
@@ -256,7 +209,7 @@ def stop_progress(self, client: SphinxClient):
     def report_progress(self, client: SphinxClient, progress: types.ProgressParams):
         """Report progress done for the given client."""
 
-        if client.id is None:
+        if client.state not in {ClientState.Running, ClientState.Building}:
             return
 
         if (token := self._progress_tokens.get(client.id, None)) is None:
diff --git a/lib/esbonio/tests/server/features/test_sphinx_manager.py b/lib/esbonio/tests/server/features/test_sphinx_manager.py
index 6067c0633..36f1fdaf4 100644
--- a/lib/esbonio/tests/server/features/test_sphinx_manager.py
+++ b/lib/esbonio/tests/server/features/test_sphinx_manager.py
@@ -1,97 +1,266 @@
+from __future__ import annotations
+
 import asyncio
-from unittest.mock import AsyncMock
-from unittest.mock import Mock
+import io
+import pathlib
+import sys
+import typing
 
 import pytest
+import pytest_asyncio
 from lsprotocol import types as lsp
 from pygls.exceptions import JsonRpcInternalError
+from pygls.server import StdOutTransportAdapter
 
 from esbonio.server import EsbonioLanguageServer
-from esbonio.server import EsbonioWorkspace
 from esbonio.server import Uri
-from esbonio.server.features.sphinx_manager import MockSphinxClient
-from esbonio.server.features.sphinx_manager import SphinxConfig
+from esbonio.server import create_language_server
+from esbonio.server.features.sphinx_manager import ClientState
 from esbonio.server.features.sphinx_manager import SphinxManager
-from esbonio.server.features.sphinx_manager import mock_sphinx_client_factory
-from esbonio.sphinx_agent import types
+from esbonio.server.features.sphinx_manager import make_subprocess_sphinx_client
+
+if typing.TYPE_CHECKING:
+    from typing import Any
+    from typing import Callable
+    from typing import Tuple
+
+    ServerManager = Callable[[Any], Tuple[EsbonioLanguageServer, SphinxManager]]
 
 
 @pytest.fixture()
-def workspace(uri_for):
+def demo_workspace(uri_for):
     return uri_for("workspaces", "demo")
 
 
 @pytest.fixture()
-def progress():
-    p = Mock()
-    p.create_async = AsyncMock()
-    return p
+def docs_workspace(uri_for):
+    return uri_for("..", "..", "..", "docs")
 
 
-@pytest.fixture()
-def server(event_loop, progress, workspace: Uri):
-    """A mock instance of the language"""
-    ready = asyncio.Future()
-    ready.set_result(True)
-
-    esbonio = EsbonioLanguageServer(loop=event_loop)
-    esbonio._ready = ready
-    esbonio.lsp.progress = progress
-    esbonio.show_message = Mock()
-    esbonio.configuration.get = Mock(return_value=SphinxConfig())
-    esbonio.lsp._workspace = EsbonioWorkspace(
-        None,
-        workspace_folders=[lsp.WorkspaceFolder(str(workspace), "workspace")],
-    )
-    return esbonio
+@pytest_asyncio.fixture()
+async def server_manager(demo_workspace: Uri, docs_workspace):
+    """An instance of the language server and sphinx manager to use for the test."""
 
+    loop = asyncio.get_running_loop()
 
-@pytest.fixture()
-def sphinx_info(workspace: Uri):
-    return types.SphinxInfo(
-        id="123",
-        version="6.0",
-        conf_dir=workspace.fs_path,
-        build_dir=(workspace / "_build" / "html").fs_path,
-        builder_name="html",
-        src_dir=workspace.fs_path,
-        dbpath=(workspace / "_build" / "html" / "esbonio.db").fs_path,
-    )
+    esbonio = create_language_server(EsbonioLanguageServer, [], loop=loop)
+    esbonio.lsp.transport = StdOutTransportAdapter(io.BytesIO(), sys.stderr.buffer)
+
+    manager = SphinxManager(make_subprocess_sphinx_client, esbonio)
+    esbonio.add_feature(manager)
+
+    def initialize(init_options):
+        # Initialize the server.
+        esbonio.lsp._procedure_handler(
+            lsp.InitializeRequest(
+                id=1,
+                params=lsp.InitializeParams(
+                    capabilities=lsp.ClientCapabilities(),
+                    initialization_options=init_options,
+                    workspace_folders=[
+                        lsp.WorkspaceFolder(uri=str(demo_workspace), name="demo"),
+                        lsp.WorkspaceFolder(uri=str(docs_workspace), name="docs"),
+                    ],
+                ),
+            )
+        )
+
+        esbonio.lsp._procedure_handler(
+            lsp.InitializedNotification(params=lsp.InitializedParams())
+        )
+        return esbonio, manager
+
+    yield initialize
+
+    await manager.shutdown(None)
 
 
 @pytest.mark.asyncio
-async def test_create_application_error(server, workspace: Uri):
-    """Ensure that we can handle errors during application creation correctly."""
+async def test_get_client(
+    server_manager: ServerManager, demo_workspace: Uri, tmp_path: pathlib.Path
+):
+    """Ensure that we can create a SphinxClient correctly."""
 
-    client = MockSphinxClient(JsonRpcInternalError("create sphinx application failed."))
-    client_factory = mock_sphinx_client_factory(client)
+    server, manager = server_manager(
+        dict(
+            esbonio=dict(
+                sphinx=dict(
+                    pythonCommand=[sys.executable],
+                    buildCommand=["sphinx-build", "-M", "dirhtml", ".", str(tmp_path)],
+                ),
+            ),
+        ),
+    )
+    # Ensure that the server is ready
+    await server.ready
 
-    manager = SphinxManager(client_factory, server)
+    result = await manager.get_client(demo_workspace / "index.rst")
+    # At least for now, the first call to get_client will not return a client
+    # but will instead "prime the system" ready to return a client the next
+    # time it is called.
+    assert result is None
+
+    # Give the async tasks chance to complete.
+    await asyncio.sleep(0.5)
+
+    # The system should be "primed" meaning there is a client created, but not
+    # running.
+    print(manager.clients)
+    client = manager.clients[str(demo_workspace)]
+
+    assert client is not None
+    assert client.state is None
+
+    # Now when we ask for the client, the client should be started and we should
+    # get back the same instance
+    result = await manager.get_client(demo_workspace / "index.rst")
+
+    assert result is client
+    assert client.state == ClientState.Running
 
-    result = await manager.get_client(workspace / "index.rst")
+    # And that the client initialized correctly
+    assert client.builder == "dirhtml"
+    assert client.src_uri == demo_workspace
+    assert client.conf_uri == demo_workspace
+    assert client.build_uri == Uri.for_file(tmp_path / "dirhtml")
+
+
+@pytest.mark.asyncio
+async def test_get_client_with_error(
+    server_manager: ServerManager, demo_workspace: Uri
+):
+    """Ensure that we correctly handle the case where there is an error with a client."""
+
+    server, manager = server_manager(None)
+    # Ensure that the server is ready
+    await server.ready
+
+    result = await manager.get_client(demo_workspace / "index.rst")
+    # At least for now, the first call to get_client will not return a client
+    # but will instead "prime the system" ready to return a client the next
+    # time it is called.
     assert result is None
 
-    server.show_message.assert_called_with(
-        "Unable to create sphinx application: create sphinx application failed.",
-        lsp.MessageType.Error,
-    )
+    # Give the async tasks chance to complete.
+    await asyncio.sleep(0.5)
+
+    # The system should be "primed" meaning there is a client created, but not
+    # running.
+    print(manager.clients)
+    client = manager.clients[str(demo_workspace)]
+
+    assert client is not None
+    assert client.state is None
+
+    # Now when we ask for the client, the client should be started and we should
+    # get back the same instance
+    result = await manager.get_client(demo_workspace / "index.rst")
+
+    assert result is client
+    assert client.state == ClientState.Errored
+    assert "No python environment configured" in str(client.exception)
+
+    # Finally, if we request another uri from the same project we should get back
+    # the same client instance - even though it failed to start.
+    result = await manager.get_client(demo_workspace / "demo_myst.md")
+    assert result is client
 
 
 @pytest.mark.asyncio
-async def test_trigger_build_error(sphinx_info, server, workspace):
-    """Ensure that we can handle build errors correctly."""
+async def test_get_client_with_many_uris(
+    server_manager: ServerManager, demo_workspace: Uri, tmp_path: pathlib.Path
+):
+    """Ensure that when called in rapid succession, with many uris we only create a
+    single client instance."""
 
-    client = MockSphinxClient(
-        sphinx_info,
-        build_result=JsonRpcInternalError("sphinx-build failed:"),
+    server, manager = server_manager(
+        dict(
+            esbonio=dict(
+                sphinx=dict(
+                    pythonCommand=[sys.executable],
+                    buildCommand=["sphinx-build", "-M", "dirhtml", ".", str(tmp_path)],
+                ),
+            ),
+        ),
     )
-    client_factory = mock_sphinx_client_factory()
 
-    manager = SphinxManager(client_factory, server)
-    manager.clients = {workspace: client}
+    # Ensure that the server is ready
+    await server.ready
 
-    await manager.trigger_build(workspace / "index.rst")
+    src_uris = [Uri.for_file(f) for f in pathlib.Path(demo_workspace).glob("**/*.rst")]
+    coros = [manager.get_client(s) for s in src_uris]
 
-    server.show_message.assert_called_with(
-        "sphinx-build failed:", lsp.MessageType.Error
-    )
+    # As with the other cases, this should only "prime" the system
+    result = await asyncio.gather(*coros)
+    assert all([r is None for r in result])
+
+    # There should only have been one client created (in addition to the 'dummy' global
+    # scoped client)
+    assert len(manager.clients) == 2
+
+    client = manager.clients[str(demo_workspace)]
+    assert client is not None
+    assert client.state == None
+
+    # Now if we do the same again we should get the same client instance for each case.
+    coros = [manager.get_client(s) for s in src_uris]
+    result = await asyncio.gather(*coros)
+
+    assert all([r is client for r in result])
+
+    client = result[0]
+    assert client.state == ClientState.Running
+
+    # And that the client initialized correctly
+    assert client.builder == "dirhtml"
+    assert client.src_uri == demo_workspace
+    assert client.conf_uri == demo_workspace
+    assert client.build_uri == Uri.for_file(tmp_path / "dirhtml")
+
+
+@pytest.mark.asyncio
+async def test_get_client_with_many_uris_in_many_projects(
+    server_manager: ServerManager,
+    demo_workspace: Uri,
+    docs_workspace: Uri,
+):
+    """Ensure that when called in rapid succession, with many uris we only create a
+    single client instance for each project."""
+
+    server, manager = server_manager(
+        dict(
+            esbonio=dict(
+                sphinx=dict(pythonCommand=[sys.executable]),
+            ),
+        ),
+    )  # Ensure that the server is ready
+    await server.ready
+
+    src_uris = [Uri.for_file(f) for f in pathlib.Path(demo_workspace).glob("**/*.rst")]
+    src_uris += [Uri.for_file(f) for f in pathlib.Path(docs_workspace).glob("**/*.rst")]
+    coros = [manager.get_client(s) for s in src_uris]
+
+    # As with the other cases, this should only "prime" the system
+    result = await asyncio.gather(*coros)
+    assert all([r is None for r in result])
+
+    # There should only have been one client created for each project (in addition to
+    # the 'dummy' global scoped client)
+    assert len(manager.clients) == 3
+
+    demo_client = manager.clients[str(demo_workspace)]
+    assert demo_client is not None
+    assert demo_client.state == None
+
+    docs_client = manager.clients[str(docs_workspace)]
+    assert docs_client is not None
+    assert docs_client.state == None
+
+    # Now if we do the same again we should get the same client instance for each case.
+    coros = [manager.get_client(s) for s in src_uris]
+    result = await asyncio.gather(*coros)
+
+    assert all([(r is demo_client) or (r is docs_client) for r in result])
+
+    assert demo_client.state == ClientState.Running
+    assert docs_client.state == ClientState.Running