diff --git a/Cargo.lock b/Cargo.lock
index 160c8aacf1..4548d0a3d7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5822,6 +5822,7 @@ dependencies = [
  "num-traits",
  "once_cell",
  "openapiv3",
+ "peg-runtime",
  "pem-rfc7468",
  "petgraph",
  "postgres-types",
@@ -6189,22 +6190,26 @@ name = "oximeter-db"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "async-recursion",
  "async-trait",
  "bcs",
  "bytes",
  "camino",
  "chrono",
  "clap 4.5.1",
+ "crossterm",
  "dropshot",
  "expectorate",
  "futures",
  "highway",
  "indexmap 2.2.5",
  "itertools 0.12.1",
+ "num",
  "omicron-common",
  "omicron-test-utils",
  "omicron-workspace-hack",
  "oximeter",
+ "peg",
  "reedline",
  "regex",
  "reqwest",
@@ -6511,6 +6516,33 @@ version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
 
+[[package]]
+name = "peg"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "400bcab7d219c38abf8bd7cc2054eb9bbbd4312d66f6a5557d572a203f646f61"
+dependencies = [
+ "peg-macros",
+ "peg-runtime",
+]
+
+[[package]]
+name = "peg-macros"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46e61cce859b76d19090f62da50a9fe92bab7c2a5f09e183763559a2ac392c90"
+dependencies = [
+ "peg-runtime",
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "peg-runtime"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36bae92c60fa2398ce4678b98b2c4b5a7c61099961ca1fa305aec04a9ad28922"
+
 [[package]]
 name = "pem"
 version = "3.0.2"
diff --git a/Cargo.toml b/Cargo.toml
index 3237cc79bd..0d66583a82 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -257,7 +257,6 @@ ipcc = { path = "ipcc" }
 ipnet = "2.9"
 itertools = "0.12.1"
 internet-checksum = "0.2"
-ipcc-key-value = { path = "ipcc-key-value" }
 ipnetwork = { version = "0.20", features = ["schemars"] }
 ispf = { git = "https://github.com/oxidecomputer/ispf" }
 key-manager = { path = "key-manager" }
@@ -313,7 +312,6 @@ openapiv3 = "2.0.0"
 # must match samael's crate!
 openssl = "0.10"
 openssl-sys = "0.9"
-openssl-probe = "0.1.5"
 opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "7ee353a470ea59529ee1b34729681da887aa88ce" }
 oso = "0.27"
 owo-colors = "4.0.0"
@@ -330,6 +328,7 @@ partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] }
 parse-size = "1.0.0"
 paste = "1.0.14"
 percent-encoding = "2.3.1"
+peg = "0.8.2"
 pem = "3.0"
 petgraph = "0.6.4"
 postgres-protocol = "0.6.6"
@@ -368,7 +367,6 @@ schemars = "0.8.16"
 secrecy = "0.8.0"
 semver = { version = "1.0.22", features = ["std", "serde"] }
 serde = { version = "1.0", default-features = false, features = [ "derive", "rc" ] }
-serde_derive = "1.0"
 serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" }
 serde_json = "1.0.114"
 serde_path_to_error = "0.1.16"
@@ -394,12 +392,12 @@ slog-envlogger = "2.2"
 slog-error-chain = { git = "https://github.com/oxidecomputer/slog-error-chain", branch = "main", features = ["derive"] }
 slog-term = "2.9"
 smf = "0.2"
-snafu = "0.7"
 socket2 = { version = "0.5", features = ["all"] }
 sp-sim = { path = "sp-sim" }
 sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" }
 sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" }
 sprockets-rot = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" }
+sqlformat = "0.2.3"
 sqlparser = { version = "0.43.1", features = [ "visitor" ] }
 static_assertions = "1.1.0"
 # Please do not change the Steno version to a Git dependency.  It makes it
diff --git a/nexus/src/app/metrics.rs b/nexus/src/app/metrics.rs
index 94fb232892..3728a3bdc1 100644
--- a/nexus/src/app/metrics.rs
+++ b/nexus/src/app/metrics.rs
@@ -13,7 +13,9 @@ use nexus_db_queries::{
     db::{fixed_data::FLEET_ID, lookup},
 };
 use omicron_common::api::external::{Error, InternalContext};
-use oximeter_db::Measurement;
+use oximeter_db::{
+    oxql, Measurement, TimeseriesSchema, TimeseriesSchemaPaginationParams,
+};
 use std::num::NonZeroU32;
 
 impl super::Nexus {
@@ -96,4 +98,85 @@ impl super::Nexus {
         )
         .await
     }
+
+    /// List available timeseries schema.
+    pub(crate) async fn timeseries_schema_list(
+        &self,
+        opctx: &OpContext,
+        pagination: &TimeseriesSchemaPaginationParams,
+        limit: NonZeroU32,
+    ) -> Result<dropshot::ResultsPage<TimeseriesSchema>, Error> {
+        // Must be a fleet user to list timeseries schema.
+        //
+        // TODO-security: We need to figure out how to implement proper security
+        // checks here, letting less-privileged users fetch data for the
+        // resources they have access to.
+        opctx.authorize(authz::Action::Read, &authz::FLEET).await?;
+        self.timeseries_client
+            .get()
+            .await
+            .map_err(|e| {
+                Error::internal_error(&format!(
+                    "Cannot access timeseries DB: {}",
+                    e
+                ))
+            })?
+            .timeseries_schema_list(&pagination.page, limit)
+            .await
+            .map_err(|e| match e {
+                oximeter_db::Error::DatabaseUnavailable(_) => {
+                    Error::ServiceUnavailable {
+                        internal_message: e.to_string(),
+                    }
+                }
+                _ => Error::InternalError { internal_message: e.to_string() },
+            })
+    }
+
+    /// Run an OxQL query against the timeseries database.
+    pub(crate) async fn timeseries_query(
+        &self,
+        opctx: &OpContext,
+        query: impl AsRef<str>,
+    ) -> Result<Vec<oxql::Table>, Error> {
+        // Must be a fleet user to list timeseries schema.
+        //
+        // TODO-security: We need to figure out how to implement proper security
+        // checks here, letting less-privileged users fetch data for the
+        // resources they have access to.
+        opctx.authorize(authz::Action::Read, &authz::FLEET).await?;
+        self.timeseries_client
+            .get()
+            .await
+            .map_err(|e| {
+                Error::internal_error(&format!(
+                    "Cannot access timeseries DB: {}",
+                    e
+                ))
+            })?
+            .oxql_query(query)
+            .await
+            .map(|result| {
+                // TODO-observability: The query method returns information
+                // about the duration of the OxQL query and the database
+                // resource usage for each contained SQL query. We should
+                // publish this as a timeseries itself, so that we can track
+                // improvements to query processing.
+                //
+                // For now, simply return the tables alone.
+                result.tables
+            })
+            .map_err(|e| match e {
+                oximeter_db::Error::DatabaseUnavailable(_) => {
+                    Error::ServiceUnavailable {
+                        internal_message: e.to_string(),
+                    }
+                }
+                oximeter_db::Error::Oxql(_)
+                | oximeter_db::Error::TimeseriesNotFound(_) => {
+                    Error::invalid_request(e.to_string())
+                }
+                _ => Error::InternalError { internal_message: e.to_string() },
+            })
+    }
 }
diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs
index 6fa530b49d..a570cd60c4 100644
--- a/nexus/src/external_api/http_entrypoints.rs
+++ b/nexus/src/external_api/http_entrypoints.rs
@@ -321,6 +321,8 @@ pub(crate) fn external_api() -> NexusApiDescription {
 
         api.register(system_metric)?;
         api.register(silo_metric)?;
+        api.register(timeseries_schema_list)?;
+        api.register(timeseries_query)?;
 
         api.register(system_update_put_repository)?;
         api.register(system_update_get_repository)?;
@@ -5626,6 +5628,56 @@ async fn silo_metric(
     apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
 }
 
+/// List available timeseries schema.
+#[endpoint {
+    method = GET,
+    path = "/v1/timeseries/schema",
+    tags = ["metrics"],
+}]
+async fn timeseries_schema_list(
+    rqctx: RequestContext<Arc<ServerContext>>,
+    pag_params: Query<oximeter_db::TimeseriesSchemaPaginationParams>,
+) -> Result<HttpResponseOk<ResultsPage<oximeter_db::TimeseriesSchema>>, HttpError>
+{
+    let apictx = rqctx.context();
+    let handler = async {
+        let nexus = &apictx.nexus;
+        let opctx = crate::context::op_context_for_external_api(&rqctx).await?;
+        let pagination = pag_params.into_inner();
+        let limit = rqctx.page_limit(&pagination)?;
+        nexus
+            .timeseries_schema_list(&opctx, &pagination, limit)
+            .await
+            .map(HttpResponseOk)
+            .map_err(HttpError::from)
+    };
+    apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
+}
+
+/// Run a timeseries query, written OxQL.
+#[endpoint {
+    method = POST,
+    path = "/v1/timeseries/query",
+    tags = ["metrics"],
+}]
+async fn timeseries_query(
+    rqctx: RequestContext<Arc<ServerContext>>,
+    body: TypedBody<params::TimeseriesQuery>,
+) -> Result<HttpResponseOk<Vec<oximeter_db::oxql::Table>>, HttpError> {
+    let apictx = rqctx.context();
+    let handler = async {
+        let nexus = &apictx.nexus;
+        let opctx = crate::context::op_context_for_external_api(&rqctx).await?;
+        let query = body.into_inner().query;
+        nexus
+            .timeseries_query(&opctx, &query)
+            .await
+            .map(HttpResponseOk)
+            .map_err(HttpError::from)
+    };
+    apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
+}
+
 // Updates
 
 /// Upload TUF repository
diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs
index 1003722723..02ab1385e3 100644
--- a/nexus/tests/integration_tests/endpoints.rs
+++ b/nexus/tests/integration_tests/endpoints.rs
@@ -848,6 +848,17 @@ pub static DEMO_SILO_METRICS_URL: Lazy<String> = Lazy::new(|| {
     )
 });
 
+pub static TIMESERIES_LIST_URL: Lazy<String> =
+    Lazy::new(|| String::from("/v1/timeseries/schema"));
+
+pub static TIMESERIES_QUERY_URL: Lazy<String> =
+    Lazy::new(|| String::from("/v1/timeseries/query"));
+
+pub static DEMO_TIMESERIES_QUERY: Lazy<params::TimeseriesQuery> =
+    Lazy::new(|| params::TimeseriesQuery {
+        query: String::from("get http_service:request_latency_histogram"),
+    });
+
 // Users
 pub static DEMO_USER_CREATE: Lazy<params::UserCreate> =
     Lazy::new(|| params::UserCreate {
@@ -2023,6 +2034,26 @@ pub static VERIFY_ENDPOINTS: Lazy<Vec<VerifyEndpoint>> = Lazy::new(|| {
             ],
         },
 
+        VerifyEndpoint {
+            url: &TIMESERIES_LIST_URL,
+            visibility: Visibility::Public,
+            unprivileged_access: UnprivilegedAccess::None,
+            allowed_methods: vec![
+                AllowedMethod::Get,
+            ],
+        },
+
+        VerifyEndpoint {
+            url: &TIMESERIES_QUERY_URL,
+            visibility: Visibility::Public,
+            unprivileged_access: UnprivilegedAccess::None,
+            allowed_methods: vec![
+                AllowedMethod::Post(
+                    serde_json::to_value(&*DEMO_TIMESERIES_QUERY).unwrap()
+                ),
+            ],
+        },
+
         /* Silo identity providers */
 
         VerifyEndpoint {
diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs
index 73f11ce49a..c96cf9b0fb 100644
--- a/nexus/tests/integration_tests/metrics.rs
+++ b/nexus/tests/integration_tests/metrics.rs
@@ -16,6 +16,7 @@ use nexus_test_utils::ControlPlaneTestContext;
 use nexus_test_utils_macros::nexus_test;
 use oximeter::types::Datum;
 use oximeter::types::Measurement;
+use oximeter::TimeseriesSchema;
 use uuid::Uuid;
 
 pub async fn query_for_metrics(
@@ -238,3 +239,27 @@ async fn test_metrics(
     // project 1 unaffected by project 2's resources
     assert_silo_metrics(&cptestctx, Some(project1_id), GIB, 4, GIB).await;
 }
+
+/// Test that we can correctly list some timeseries schema.
+#[nexus_test]
+async fn test_timeseries_schema_list(
+    cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
+) {
+    // We should be able to fetch the list of timeseries, and it should include
+    // Nexus's HTTP latency distribution. This is defined in Nexus itself, and
+    // should always exist after we've registered as a producer and start
+    // producing data. Force a collection to ensure that happens.
+    cptestctx.server.register_as_producer().await;
+    cptestctx.oximeter.force_collect().await;
+    let client = &cptestctx.external_client;
+    let url = "/v1/timeseries/schema";
+    let schema =
+        objects_list_page_authz::<TimeseriesSchema>(client, &url).await;
+    schema
+        .items
+        .iter()
+        .find(|sc| {
+            sc.timeseries_name == "http_service:request_latency_histogram"
+        })
+        .expect("Failed to find HTTP request latency histogram schema");
+}
diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt
index 91d2504a57..3e40e8293d 100644
--- a/nexus/tests/output/nexus_tags.txt
+++ b/nexus/tests/output/nexus_tags.txt
@@ -73,6 +73,8 @@ login_saml                               POST     /login/{silo_name}/saml/{provi
 API operations found with tag "metrics"
 OPERATION ID                             METHOD   URL PATH
 silo_metric                              GET      /v1/metrics/{metric_name}
+timeseries_query                         POST     /v1/timeseries/query
+timeseries_schema_list                   GET      /v1/timeseries/schema
 
 API operations found with tag "policy"
 OPERATION ID                             METHOD   URL PATH
diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs
index 1ba373ff56..3829484a27 100644
--- a/nexus/types/src/external_api/params.rs
+++ b/nexus/types/src/external_api/params.rs
@@ -2055,3 +2055,10 @@ pub struct ProbeListSelector {
     /// A name or id to use when selecting a probe.
     pub name_or_id: Option<NameOrId>,
 }
+
+/// A timeseries query string, written in the Oximeter query language.
+#[derive(Deserialize, JsonSchema, Serialize)]
+pub struct TimeseriesQuery {
+    /// A timeseries query string, written in the Oximeter query language.
+    pub query: String,
+}
diff --git a/openapi/nexus.json b/openapi/nexus.json
index 3cc991126d..e7e4c1d31c 100644
--- a/openapi/nexus.json
+++ b/openapi/nexus.json
@@ -7929,6 +7929,99 @@
         }
       }
     },
+    "/v1/timeseries/query": {
+      "post": {
+        "tags": [
+          "metrics"
+        ],
+        "summary": "Run a timeseries query, written OxQL.",
+        "operationId": "timeseries_query",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/TimeseriesQuery"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "successful operation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "title": "Array_of_Table",
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/Table"
+                  }
+                }
+              }
+            }
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      }
+    },
+    "/v1/timeseries/schema": {
+      "get": {
+        "tags": [
+          "metrics"
+        ],
+        "summary": "List available timeseries schema.",
+        "operationId": "timeseries_schema_list",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "limit",
+            "description": "Maximum number of items returned by a single call",
+            "schema": {
+              "nullable": true,
+              "type": "integer",
+              "format": "uint32",
+              "minimum": 1
+            }
+          },
+          {
+            "in": "query",
+            "name": "page_token",
+            "description": "Token returned by previous call to retrieve the subsequent page",
+            "schema": {
+              "nullable": true,
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "successful operation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/TimeseriesSchemaResultsPage"
+                }
+              }
+            }
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        },
+        "x-dropshot-pagination": {
+          "required": []
+        }
+      }
+    },
     "/v1/users": {
       "get": {
         "tags": [
@@ -11917,6 +12010,56 @@
           }
         ]
       },
+      "Distributiondouble": {
+        "description": "A distribution is a sequence of bins and counts in those bins.",
+        "type": "object",
+        "properties": {
+          "bins": {
+            "type": "array",
+            "items": {
+              "type": "number",
+              "format": "double"
+            }
+          },
+          "counts": {
+            "type": "array",
+            "items": {
+              "type": "integer",
+              "format": "uint64",
+              "minimum": 0
+            }
+          }
+        },
+        "required": [
+          "bins",
+          "counts"
+        ]
+      },
+      "Distributionint64": {
+        "description": "A distribution is a sequence of bins and counts in those bins.",
+        "type": "object",
+        "properties": {
+          "bins": {
+            "type": "array",
+            "items": {
+              "type": "integer",
+              "format": "int64"
+            }
+          },
+          "counts": {
+            "type": "array",
+            "items": {
+              "type": "integer",
+              "format": "uint64",
+              "minimum": 0
+            }
+          }
+        },
+        "required": [
+          "bins",
+          "counts"
+        ]
+      },
       "EphemeralIpCreate": {
         "description": "Parameters for creating an ephemeral IP address for an instance.",
         "type": "object",
@@ -12080,33 +12223,314 @@
               }
             },
             "required": [
-              "floating_ip",
-              "type"
+              "floating_ip",
+              "type"
+            ]
+          }
+        ]
+      },
+      "ExternalIpResultsPage": {
+        "description": "A single page of results",
+        "type": "object",
+        "properties": {
+          "items": {
+            "description": "list of items on this page of results",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ExternalIp"
+            }
+          },
+          "next_page": {
+            "nullable": true,
+            "description": "token used to fetch the next page of results (if any)",
+            "type": "string"
+          }
+        },
+        "required": [
+          "items"
+        ]
+      },
+      "FieldSchema": {
+        "description": "The name and type information for a field of a timeseries schema.",
+        "type": "object",
+        "properties": {
+          "field_type": {
+            "$ref": "#/components/schemas/FieldType"
+          },
+          "name": {
+            "type": "string"
+          },
+          "source": {
+            "$ref": "#/components/schemas/FieldSource"
+          }
+        },
+        "required": [
+          "field_type",
+          "name",
+          "source"
+        ]
+      },
+      "FieldSource": {
+        "description": "The source from which a field is derived, the target or metric.",
+        "type": "string",
+        "enum": [
+          "target",
+          "metric"
+        ]
+      },
+      "FieldType": {
+        "description": "The `FieldType` identifies the data type of a target or metric field.",
+        "type": "string",
+        "enum": [
+          "string",
+          "i8",
+          "u8",
+          "i16",
+          "u16",
+          "i32",
+          "u32",
+          "i64",
+          "u64",
+          "ip_addr",
+          "uuid",
+          "bool"
+        ]
+      },
+      "FieldValue": {
+        "description": "The `FieldValue` contains the value of a target or metric field.",
+        "oneOf": [
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "string"
+                ]
+              },
+              "value": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "i8"
+                ]
+              },
+              "value": {
+                "type": "integer",
+                "format": "int8"
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "u8"
+                ]
+              },
+              "value": {
+                "type": "integer",
+                "format": "uint8",
+                "minimum": 0
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "i16"
+                ]
+              },
+              "value": {
+                "type": "integer",
+                "format": "int16"
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "u16"
+                ]
+              },
+              "value": {
+                "type": "integer",
+                "format": "uint16",
+                "minimum": 0
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "i32"
+                ]
+              },
+              "value": {
+                "type": "integer",
+                "format": "int32"
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "u32"
+                ]
+              },
+              "value": {
+                "type": "integer",
+                "format": "uint32",
+                "minimum": 0
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "i64"
+                ]
+              },
+              "value": {
+                "type": "integer",
+                "format": "int64"
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "u64"
+                ]
+              },
+              "value": {
+                "type": "integer",
+                "format": "uint64",
+                "minimum": 0
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "ip_addr"
+                ]
+              },
+              "value": {
+                "type": "string",
+                "format": "ip"
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "uuid"
+                ]
+              },
+              "value": {
+                "type": "string",
+                "format": "uuid"
+              }
+            },
+            "required": [
+              "type",
+              "value"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "bool"
+                ]
+              },
+              "value": {
+                "type": "boolean"
+              }
+            },
+            "required": [
+              "type",
+              "value"
             ]
           }
         ]
       },
-      "ExternalIpResultsPage": {
-        "description": "A single page of results",
-        "type": "object",
-        "properties": {
-          "items": {
-            "description": "list of items on this page of results",
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/ExternalIp"
-            }
-          },
-          "next_page": {
-            "nullable": true,
-            "description": "token used to fetch the next page of results (if any)",
-            "type": "string"
-          }
-        },
-        "required": [
-          "items"
-        ]
-      },
       "FinalizeDisk": {
         "description": "Parameters for finalizing a disk",
         "type": "object",
@@ -14279,6 +14703,32 @@
           "items"
         ]
       },
+      "MetricType": {
+        "description": "The type of the metric itself, indicating what its values represent.",
+        "oneOf": [
+          {
+            "description": "The value represents an instantaneous measurement in time.",
+            "type": "string",
+            "enum": [
+              "gauge"
+            ]
+          },
+          {
+            "description": "The value represents a difference between two points in time.",
+            "type": "string",
+            "enum": [
+              "delta"
+            ]
+          },
+          {
+            "description": "The value represents an accumulation between two points in time.",
+            "type": "string",
+            "enum": [
+              "cumulative"
+            ]
+          }
+        ]
+      },
       "MissingDatum": {
         "type": "object",
         "properties": {
@@ -14614,6 +15064,37 @@
           "ok"
         ]
       },
+      "Points": {
+        "description": "Timepoints and values for one timeseries.",
+        "type": "object",
+        "properties": {
+          "start_times": {
+            "nullable": true,
+            "type": "array",
+            "items": {
+              "type": "string",
+              "format": "date-time"
+            }
+          },
+          "timestamps": {
+            "type": "array",
+            "items": {
+              "type": "string",
+              "format": "date-time"
+            }
+          },
+          "values": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Values"
+            }
+          }
+        },
+        "required": [
+          "timestamps",
+          "values"
+        ]
+      },
       "Probe": {
         "description": "Identity-related metadata that's included in nearly all public API objects",
         "type": "object",
@@ -16965,6 +17446,113 @@
           "vlan_id"
         ]
       },
+      "Table": {
+        "description": "A table represents one or more timeseries with the same schema.\n\nA table is the result of an OxQL query. It contains a name, usually the name of the timeseries schema from which the data is derived, and any number of timeseries, which contain the actual data.",
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string"
+          },
+          "timeseries": {
+            "type": "object",
+            "additionalProperties": {
+              "$ref": "#/components/schemas/Timeseries"
+            }
+          }
+        },
+        "required": [
+          "name",
+          "timeseries"
+        ]
+      },
+      "Timeseries": {
+        "description": "A timeseries contains a timestamped set of values from one source.\n\nThis includes the typed key-value pairs that uniquely identify it, and the set of timestamps and data values from it.",
+        "type": "object",
+        "properties": {
+          "fields": {
+            "type": "object",
+            "additionalProperties": {
+              "$ref": "#/components/schemas/FieldValue"
+            }
+          },
+          "points": {
+            "$ref": "#/components/schemas/Points"
+          }
+        },
+        "required": [
+          "fields",
+          "points"
+        ]
+      },
+      "TimeseriesName": {
+        "title": "The name of a timeseries",
+        "description": "Names are constructed by concatenating the target and metric names with ':'. Target and metric names must be lowercase alphanumeric characters with '_' separating words.",
+        "type": "string",
+        "pattern": "^(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*):(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*)$"
+      },
+      "TimeseriesQuery": {
+        "description": "A timeseries query string, written in the Oximeter query language.",
+        "type": "object",
+        "properties": {
+          "query": {
+            "description": "A timeseries query string, written in the Oximeter query language.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "query"
+        ]
+      },
+      "TimeseriesSchema": {
+        "description": "The schema for a timeseries.\n\nThis includes the name of the timeseries, as well as the datum type of its metric and the schema for each field.",
+        "type": "object",
+        "properties": {
+          "created": {
+            "type": "string",
+            "format": "date-time"
+          },
+          "datum_type": {
+            "$ref": "#/components/schemas/DatumType"
+          },
+          "field_schema": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/FieldSchema"
+            },
+            "uniqueItems": true
+          },
+          "timeseries_name": {
+            "$ref": "#/components/schemas/TimeseriesName"
+          }
+        },
+        "required": [
+          "created",
+          "datum_type",
+          "field_schema",
+          "timeseries_name"
+        ]
+      },
+      "TimeseriesSchemaResultsPage": {
+        "description": "A single page of results",
+        "type": "object",
+        "properties": {
+          "items": {
+            "description": "list of items on this page of results",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/TimeseriesSchema"
+            }
+          },
+          "next_page": {
+            "nullable": true,
+            "description": "token used to fetch the next page of results (if any)",
+            "type": "string"
+          }
+        },
+        "required": [
+          "items"
+        ]
+      },
       "UninitializedSled": {
         "description": "A sled that has not been added to an initialized rack yet",
         "type": "object",
@@ -17246,6 +17834,169 @@
           "provisioned"
         ]
       },
+      "ValueArray": {
+        "description": "List of data values for one timeseries.\n\nEach element is an option, where `None` represents a missing sample.",
+        "oneOf": [
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "integer"
+                ]
+              },
+              "values": {
+                "type": "array",
+                "items": {
+                  "nullable": true,
+                  "type": "integer",
+                  "format": "int64"
+                }
+              }
+            },
+            "required": [
+              "type",
+              "values"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "double"
+                ]
+              },
+              "values": {
+                "type": "array",
+                "items": {
+                  "nullable": true,
+                  "type": "number",
+                  "format": "double"
+                }
+              }
+            },
+            "required": [
+              "type",
+              "values"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "boolean"
+                ]
+              },
+              "values": {
+                "type": "array",
+                "items": {
+                  "nullable": true,
+                  "type": "boolean"
+                }
+              }
+            },
+            "required": [
+              "type",
+              "values"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "string"
+                ]
+              },
+              "values": {
+                "type": "array",
+                "items": {
+                  "nullable": true,
+                  "type": "string"
+                }
+              }
+            },
+            "required": [
+              "type",
+              "values"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "integer_distribution"
+                ]
+              },
+              "values": {
+                "type": "array",
+                "items": {
+                  "nullable": true,
+                  "allOf": [
+                    {
+                      "$ref": "#/components/schemas/Distributionint64"
+                    }
+                  ]
+                }
+              }
+            },
+            "required": [
+              "type",
+              "values"
+            ]
+          },
+          {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "double_distribution"
+                ]
+              },
+              "values": {
+                "type": "array",
+                "items": {
+                  "nullable": true,
+                  "allOf": [
+                    {
+                      "$ref": "#/components/schemas/Distributiondouble"
+                    }
+                  ]
+                }
+              }
+            },
+            "required": [
+              "type",
+              "values"
+            ]
+          }
+        ]
+      },
+      "Values": {
+        "description": "A single list of values, for one dimension of a timeseries.",
+        "type": "object",
+        "properties": {
+          "metric_type": {
+            "$ref": "#/components/schemas/MetricType"
+          },
+          "values": {
+            "$ref": "#/components/schemas/ValueArray"
+          }
+        },
+        "required": [
+          "metric_type",
+          "values"
+        ]
+      },
       "VirtualResourceCounts": {
         "description": "A collection of resource counts used to describe capacity and utilization",
         "type": "object",
diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml
index c4ee44acb6..88a2ab8a89 100644
--- a/oximeter/db/Cargo.toml
+++ b/oximeter/db/Cargo.toml
@@ -7,6 +7,7 @@ license = "MPL-2.0"
 
 [dependencies]
 anyhow.workspace = true
+async-recursion = "1.0.5"
 async-trait.workspace = true
 bcs.workspace = true
 camino.workspace = true
@@ -15,21 +16,16 @@ clap.workspace = true
 dropshot.workspace = true
 futures.workspace = true
 highway.workspace = true
-indexmap.workspace = true
 omicron-common.workspace = true
 omicron-workspace-hack.workspace = true
 oximeter.workspace = true
-reedline.workspace = true
 regex.workspace = true
-rustyline.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 slog.workspace = true
 slog-async.workspace = true
+slog-dtrace.workspace = true
 slog-term.workspace = true
-sqlparser.workspace = true
-sqlformat = "0.2.3"
-tabled.workspace = true
 thiserror.workspace = true
 usdt.workspace = true
 uuid.workspace = true
@@ -38,26 +34,82 @@ uuid.workspace = true
 workspace = true
 features = [ "serde" ]
 
+[dependencies.crossterm]
+workspace = true
+optional = true
+
+[dependencies.indexmap]
+workspace = true
+optional = true
+
+[dependencies.num]
+workspace = true
+optional = true
+
+[dependencies.peg]
+workspace = true
+optional = true
+
+[dependencies.reedline]
+workspace = true
+optional = true
+
 [dependencies.reqwest]
 workspace = true
 features = [ "json" ]
 
+[dependencies.rustyline]
+workspace = true
+optional = true
+
 [dependencies.schemars]
 workspace = true
 features = [ "uuid1", "bytes", "chrono" ]
 
+[dependencies.sqlformat]
+workspace = true
+optional = true
+
+[dependencies.sqlparser]
+workspace = true
+optional = true
+
 [dependencies.tokio]
 workspace = true
 features = [ "rt-multi-thread", "macros" ]
 
+[dependencies.tabled]
+workspace = true
+optional = true
+
 [dev-dependencies]
 expectorate.workspace = true
+indexmap.workspace = true
 itertools.workspace = true
 omicron-test-utils.workspace = true
 slog-dtrace.workspace = true
+sqlparser.workspace = true
 strum.workspace = true
 tempfile.workspace = true
 
+[features]
+default = [ "oxql", "sql" ]
+sql = [
+    "dep:indexmap",
+    "dep:reedline",
+    "dep:rustyline",
+    "dep:sqlformat",
+    "dep:sqlparser",
+    "dep:tabled"
+]
+oxql = [
+    "dep:crossterm",
+    "dep:num",
+    "dep:peg",
+    "dep:reedline",
+    "dep:tabled",
+]
+
 [[bin]]
 name = "oxdb"
 doc = false
diff --git a/oximeter/db/src/bin/oxdb.rs b/oximeter/db/src/bin/oxdb/main.rs
similarity index 50%
rename from oximeter/db/src/bin/oxdb.rs
rename to oximeter/db/src/bin/oxdb/main.rs
index 02a8054da0..ca11dd18a3 100644
--- a/oximeter/db/src/bin/oxdb.rs
+++ b/oximeter/db/src/bin/oxdb/main.rs
@@ -4,31 +4,27 @@
 
 //! Tool for developing against the Oximeter timeseries database, populating data and querying.
 
-// Copyright 2023 Oxide Computer Company
+// Copyright 2024 Oxide Computer Company
 
 use anyhow::{bail, Context};
 use chrono::{DateTime, Utc};
 use clap::{Args, Parser};
-use dropshot::EmptyScanParams;
-use dropshot::WhichPage;
 use oximeter::{
     types::{Cumulative, Sample},
     Metric, Target,
 };
-use oximeter_db::sql::function_allow_list;
-use oximeter_db::QueryMetadata;
-use oximeter_db::QueryResult;
-use oximeter_db::Table;
 use oximeter_db::{query, Client, DbWrite};
-use reedline::DefaultPrompt;
-use reedline::DefaultPromptSegment;
-use reedline::Reedline;
-use reedline::Signal;
 use slog::{debug, info, o, Drain, Level, Logger};
 use std::net::IpAddr;
 use std::net::SocketAddr;
 use uuid::Uuid;
 
+#[cfg(feature = "sql")]
+mod sql;
+
+#[cfg(feature = "oxql")]
+mod oxql;
+
 // Samples are inserted in chunks of this size, to avoid large allocations when inserting huge
 // numbers of timeseries.
 const INSERT_CHUNK_SIZE: usize = 100_000;
@@ -151,9 +147,17 @@ enum Subcommand {
     },
 
     /// Enter a SQL shell for interactive querying.
+    #[cfg(feature = "sql")]
     Sql {
         #[clap(flatten)]
-        opts: ShellOptions,
+        opts: crate::sql::ShellOptions,
+    },
+
+    /// Enter the Oximeter Query Language shell for interactive querying.
+    #[cfg(feature = "oxql")]
+    Oxql {
+        #[clap(flatten)]
+        opts: crate::oxql::ShellOptions,
     },
 }
 
@@ -312,281 +316,6 @@ async fn query(
     Ok(())
 }
 
-fn print_basic_commands() {
-    println!("Basic commands:");
-    println!("  \\?, \\h, help      - Print this help");
-    println!("  \\q, quit, exit, ^D - Exit the shell");
-    println!("  \\l                 - List tables");
-    println!("  \\d <table>         - Describe a table");
-    println!(
-        "  \\f <function>      - List or describe ClickHouse SQL functions"
-    );
-    println!();
-    println!("Or try entering a SQL `SELECT` statement");
-}
-
-async fn list_virtual_tables(client: &Client) -> anyhow::Result<()> {
-    let mut page = WhichPage::First(EmptyScanParams {});
-    let limit = 100.try_into().unwrap();
-    loop {
-        let results = client.timeseries_schema_list(&page, limit).await?;
-        for schema in results.items.iter() {
-            println!("{}", schema.timeseries_name);
-        }
-        if results.next_page.is_some() {
-            if let Some(last) = results.items.last() {
-                page = WhichPage::Next(last.timeseries_name.clone());
-            } else {
-                return Ok(());
-            }
-        } else {
-            return Ok(());
-        }
-    }
-}
-
-async fn describe_virtual_table(
-    client: &Client,
-    table: &str,
-) -> anyhow::Result<()> {
-    match table.parse() {
-        Err(_) => println!("Invalid timeseries name: {table}"),
-        Ok(name) => {
-            if let Some(schema) = client.schema_for_timeseries(&name).await? {
-                let mut cols =
-                    Vec::with_capacity(schema.field_schema.len() + 2);
-                let mut types = cols.clone();
-                for field in schema.field_schema.iter() {
-                    cols.push(field.name.clone());
-                    types.push(field.field_type.to_string());
-                }
-                cols.push("timestamp".into());
-                types.push("DateTime64".into());
-
-                if schema.datum_type.is_histogram() {
-                    cols.push("start_time".into());
-                    types.push("DateTime64".into());
-
-                    cols.push("bins".into());
-                    types.push(format!(
-                        "Array[{}]",
-                        schema
-                            .datum_type
-                            .to_string()
-                            .strip_prefix("Histogram")
-                            .unwrap()
-                            .to_lowercase(),
-                    ));
-
-                    cols.push("counts".into());
-                    types.push("Array[u64]".into());
-                } else if schema.datum_type.is_cumulative() {
-                    cols.push("start_time".into());
-                    types.push("DateTime64".into());
-                    cols.push("datum".into());
-                    types.push(schema.datum_type.to_string());
-                } else {
-                    cols.push("datum".into());
-                    types.push(schema.datum_type.to_string());
-                }
-
-                let mut builder = tabled::builder::Builder::default();
-                builder.push_record(cols); // first record is the header
-                builder.push_record(types);
-                println!(
-                    "{}",
-                    builder.build().with(tabled::settings::Style::psql())
-                );
-            } else {
-                println!("No such timeseries: {table}");
-            }
-        }
-    }
-    Ok(())
-}
-
-#[derive(Clone, Debug, Args)]
-struct ShellOptions {
-    /// Print query metadata.
-    #[clap(long = "metadata")]
-    print_metadata: bool,
-    /// Print the original SQL query.
-    #[clap(long = "original")]
-    print_original_query: bool,
-    /// Print the rewritten SQL query that is actually run on the DB.
-    #[clap(long = "rewritten")]
-    print_rewritten_query: bool,
-    /// Print the transformed query, but do not run it.
-    #[clap(long)]
-    transform: Option<String>,
-}
-
-impl Default for ShellOptions {
-    fn default() -> Self {
-        Self {
-            print_metadata: true,
-            print_original_query: false,
-            print_rewritten_query: false,
-            transform: None,
-        }
-    }
-}
-
-fn list_supported_functions() {
-    println!("Subset of ClickHouse SQL functions currently supported");
-    println!(
-        "See https://clickhouse.com/docs/en/sql-reference/functions for more"
-    );
-    println!();
-    for func in function_allow_list().iter() {
-        println!(" {func}");
-    }
-}
-
-fn show_supported_function(name: &str) {
-    if let Some(func) = function_allow_list().iter().find(|f| f.name == name) {
-        println!("{}", func.name);
-        println!("  {}", func.usage);
-        println!("  {}", func.description);
-    } else {
-        println!("No supported function '{name}'");
-    }
-}
-
-fn print_sql_query(query: &str) {
-    println!(
-        "{}",
-        sqlformat::format(
-            &query,
-            &sqlformat::QueryParams::None,
-            sqlformat::FormatOptions { uppercase: true, ..Default::default() }
-        )
-    );
-    println!();
-}
-
-fn print_query_metadata(table: &Table, metadata: &QueryMetadata) {
-    println!("Metadata");
-    println!(" Query ID:    {}", metadata.id);
-    println!(" Result rows: {}", table.rows.len());
-    println!(" Time:        {:?}", metadata.elapsed);
-    println!(" Read:        {}\n", metadata.summary.read);
-}
-
-async fn sql_shell(
-    address: IpAddr,
-    port: u16,
-    log: Logger,
-    opts: ShellOptions,
-) -> anyhow::Result<()> {
-    let client = make_client(address, port, &log).await?;
-
-    // A workaround to ensure the client has all available timeseries when the
-    // shell starts.
-    let dummy = "foo:bar".parse().unwrap();
-    let _ = client.schema_for_timeseries(&dummy).await;
-
-    // Possibly just transform the query, but do not execute it.
-    if let Some(query) = &opts.transform {
-        let transformed = client.transform_query(query).await?;
-        println!(
-            "{}",
-            sqlformat::format(
-                &transformed,
-                &sqlformat::QueryParams::None,
-                sqlformat::FormatOptions {
-                    uppercase: true,
-                    ..Default::default()
-                }
-            )
-        );
-        return Ok(());
-    }
-
-    let mut ed = Reedline::create();
-    let prompt = DefaultPrompt::new(
-        DefaultPromptSegment::Basic("0x".to_string()),
-        DefaultPromptSegment::Empty,
-    );
-    println!("Oximeter SQL shell");
-    println!();
-    print_basic_commands();
-    loop {
-        let sig = ed.read_line(&prompt);
-        match sig {
-            Ok(Signal::Success(buf)) => {
-                let cmd = buf.as_str().trim();
-                match cmd {
-                    "" => continue,
-                    "\\?" | "\\h" | "help" => print_basic_commands(),
-                    "\\q" | "quit" | "exit" => return Ok(()),
-                    "\\l" | "\\d" => list_virtual_tables(&client).await?,
-                    _ => {
-                        if let Some(table_name) = cmd.strip_prefix("\\d") {
-                            if table_name.is_empty() {
-                                list_virtual_tables(&client).await?;
-                            } else {
-                                describe_virtual_table(
-                                    &client,
-                                    table_name.trim().trim_end_matches(';'),
-                                )
-                                .await?;
-                            }
-                        } else if let Some(func_name) = cmd.strip_prefix("\\f")
-                        {
-                            if func_name.is_empty() {
-                                list_supported_functions();
-                            } else {
-                                show_supported_function(
-                                    func_name.trim().trim_end_matches(';'),
-                                );
-                            }
-                        } else {
-                            match client.query(&buf).await {
-                                Err(e) => println!("Query failed: {e:#?}"),
-                                Ok(QueryResult {
-                                    original_query,
-                                    rewritten_query,
-                                    metadata,
-                                    table,
-                                }) => {
-                                    println!();
-                                    let mut builder =
-                                        tabled::builder::Builder::default();
-                                    builder.push_record(&table.column_names); // first record is the header
-                                    for row in table.rows.iter() {
-                                        builder.push_record(
-                                            row.iter().map(ToString::to_string),
-                                        );
-                                    }
-                                    if opts.print_original_query {
-                                        print_sql_query(&original_query);
-                                    }
-                                    if opts.print_rewritten_query {
-                                        print_sql_query(&rewritten_query);
-                                    }
-                                    println!(
-                                        "{}\n",
-                                        builder.build().with(
-                                            tabled::settings::Style::psql()
-                                        )
-                                    );
-                                    if opts.print_metadata {
-                                        print_query_metadata(&table, &metadata);
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            Ok(Signal::CtrlD) => return Ok(()),
-            Ok(Signal::CtrlC) => continue,
-            err => println!("err: {err:?}"),
-        }
-    }
-}
-
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
     usdt::register_probes().context("Failed to register USDT probes")?;
@@ -598,6 +327,7 @@ async fn main() -> anyhow::Result<()> {
         .filter_level(args.log_level)
         .fuse();
     let drain = slog_async::Async::new(drain).build().fuse();
+    let drain = slog_dtrace::with_drain(drain).0.fuse();
     let log = Logger::root(drain, o!("component" => "oxdb"));
     match args.cmd {
         Subcommand::Describe => describe_data(),
@@ -636,8 +366,13 @@ async fn main() -> anyhow::Result<()> {
             )
             .await?;
         }
+        #[cfg(feature = "sql")]
         Subcommand::Sql { opts } => {
-            sql_shell(args.address, args.port, log, opts).await?
+            crate::sql::sql_shell(args.address, args.port, log, opts).await?
+        }
+        #[cfg(feature = "oxql")]
+        Subcommand::Oxql { opts } => {
+            crate::oxql::oxql_shell(args.address, args.port, log, opts).await?
         }
     }
     Ok(())
diff --git a/oximeter/db/src/bin/oxdb/oxql.rs b/oximeter/db/src/bin/oxdb/oxql.rs
new file mode 100644
index 0000000000..54e40afa15
--- /dev/null
+++ b/oximeter/db/src/bin/oxdb/oxql.rs
@@ -0,0 +1,333 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! OxQL shell.
+
+// Copyright 2024 Oxide Computer
+
+use crate::make_client;
+use clap::Args;
+use crossterm::style::Stylize;
+use dropshot::EmptyScanParams;
+use dropshot::WhichPage;
+use oximeter_db::oxql::query::special_idents;
+use oximeter_db::oxql::Table;
+use oximeter_db::Client;
+use oximeter_db::OxqlResult;
+use reedline::DefaultPrompt;
+use reedline::DefaultPromptSegment;
+use reedline::Reedline;
+use reedline::Signal;
+use slog::Logger;
+use std::net::IpAddr;
+
+#[derive(Clone, Debug, Args)]
+pub struct ShellOptions {
+    /// Print summaries of each SQL query run against the database.
+    #[clap(long = "summaries")]
+    print_summaries: bool,
+    /// Print the total elapsed query duration.
+    #[clap(long = "elapsed")]
+    print_elapsed: bool,
+}
+
+// Print help for the basic OxQL commands.
+fn print_basic_commands() {
+    println!("Basic commands:");
+    println!("  \\?, \\h, help       - Print this help");
+    println!("  \\q, quit, exit, ^D - Exit the shell");
+    println!("  \\l                 - List timeseries");
+    println!("  \\d <timeseries>    - Describe a timeseries");
+    println!("  \\ql [<operation>]  - Get OxQL help about an operation");
+    println!();
+    println!("Or try entering an OxQL `get` query");
+}
+
+// Print high-level information about OxQL.
+fn print_general_oxql_help() {
+    const HELP: &str = r#"Oximeter Query Language
+
+The Oximeter Query Language (OxQL) implements queries as
+as sequence of operations. Each of these takes zero or more
+timeseries as inputs, and produces zero or more timeseries
+as outputs. Operations are chained together with the pipe
+operator, "|".
+
+All queries start with a `get` operation, which selects a
+timeseries from the database, by name. For example:
+
+`get physical_data_link:bytes_received`
+
+The supported timeseries operations are:
+
+- get: Select a timeseries by name
+- filter: Filter timeseries by field or sample values
+- group_by: Group timeseries by fields, applying a reducer.
+- join: Join two or more timeseries together
+
+Run `\ql <operation>` to get specific help about that operation.
+    "#;
+    println!("{HELP}");
+}
+
+// Print help for a specific OxQL operation.
+fn print_oxql_operation_help(op: &str) {
+    match op {
+        "get" => {
+            const HELP: &str = r#"get <timeseries_name>");
+
+Get instances of a timeseries by name"#;
+            println!("{HELP}");
+        }
+        "filter" => {
+            const HELP: &str = r#"filter <expr>");
+
+Filter timeseries based on their attributes.
+<expr> can be a logical combination of filtering
+\"atoms\", such as `field_foo > 0`. Expressions
+may use any of the usual comparison operators, and
+can be nested and combined with && or ||.
+
+Expressions must refer to the name of a field
+for a timeseries at this time, and must compare
+against literals. For example, `some_field > 0`
+is supported, but `some_field > other_field` is not."#;
+            println!("{HELP}");
+        }
+        "group_by" => {
+            const HELP: &str = r#"group_by [<field name>, ... ]
+group_by [<field name>, ... ], <reducer>
+
+Group timeseries by the named fields, optionally
+specifying a reducer to use when aggregating the
+timeseries within each group. If no reducer is
+specified, `mean` is used, averaging the values
+within each group.
+
+Current supported reducers:
+ - mean
+ - sum"#;
+            println!("{HELP}");
+        }
+        "join" => {
+            const HELP: &str = r#"join
+
+Combine 2 or more tables by peforming a natural
+inner join, matching up those with fields of the
+same value. Currently, joining does not take into
+account the timestamps, and does not align the outputs
+directly."#;
+            println!("{HELP}");
+        }
+        _ => eprintln!("unrecognized OxQL operation: '{op}'"),
+    }
+}
+
+// List the known timeseries.
+async fn list_timeseries(client: &Client) -> anyhow::Result<()> {
+    let mut page = WhichPage::First(EmptyScanParams {});
+    let limit = 100.try_into().unwrap();
+    loop {
+        let results = client.timeseries_schema_list(&page, limit).await?;
+        for schema in results.items.iter() {
+            println!("{}", schema.timeseries_name);
+        }
+        if results.next_page.is_some() {
+            if let Some(last) = results.items.last() {
+                page = WhichPage::Next(last.timeseries_name.clone());
+            } else {
+                return Ok(());
+            }
+        } else {
+            return Ok(());
+        }
+    }
+}
+
+// Describe a single timeseries.
+async fn describe_timeseries(
+    client: &Client,
+    timeseries: &str,
+) -> anyhow::Result<()> {
+    match timeseries.parse() {
+        Err(_) => eprintln!(
+            "Invalid timeseries name '{timeseries}, \
+            use \\l to list available timeseries by name
+        "
+        ),
+        Ok(name) => {
+            if let Some(schema) = client.schema_for_timeseries(&name).await? {
+                let mut cols =
+                    Vec::with_capacity(schema.field_schema.len() + 2);
+                let mut types = cols.clone();
+                for field in schema.field_schema.iter() {
+                    cols.push(field.name.clone());
+                    types.push(field.field_type.to_string());
+                }
+                cols.push(special_idents::TIMESTAMP.into());
+                types.push(special_idents::DATETIME64.into());
+
+                if schema.datum_type.is_histogram() {
+                    cols.push(special_idents::START_TIME.into());
+                    types.push(special_idents::DATETIME64.into());
+
+                    cols.push(special_idents::BINS.into());
+                    types.push(
+                        special_idents::array_type_name_from_histogram_type(
+                            schema.datum_type,
+                        )
+                        .unwrap(),
+                    );
+
+                    cols.push(special_idents::COUNTS.into());
+                    types.push(special_idents::ARRAYU64.into());
+                } else if schema.datum_type.is_cumulative() {
+                    cols.push(special_idents::START_TIME.into());
+                    types.push(special_idents::DATETIME64.into());
+                    cols.push(special_idents::DATUM.into());
+                    types.push(schema.datum_type.to_string());
+                } else {
+                    cols.push(special_idents::DATUM.into());
+                    types.push(schema.datum_type.to_string());
+                }
+
+                let mut builder = tabled::builder::Builder::default();
+                builder.push_record(cols); // first record is the header
+                builder.push_record(types);
+                println!(
+                    "{}",
+                    builder.build().with(tabled::settings::Style::psql())
+                );
+            } else {
+                eprintln!("No such timeseries: {timeseries}");
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Run the OxQL shell.
+pub async fn oxql_shell(
+    address: IpAddr,
+    port: u16,
+    log: Logger,
+    opts: ShellOptions,
+) -> anyhow::Result<()> {
+    let client = make_client(address, port, &log).await?;
+
+    // A workaround to ensure the client has all available timeseries when the
+    // shell starts.
+    let dummy = "foo:bar".parse().unwrap();
+    let _ = client.schema_for_timeseries(&dummy).await;
+
+    // Create the line-editor.
+    let mut ed = Reedline::create();
+    let prompt = DefaultPrompt::new(
+        DefaultPromptSegment::Basic("0x".to_string()),
+        DefaultPromptSegment::Empty,
+    );
+    println!("Oximeter Query Language shell");
+    println!();
+    print_basic_commands();
+    loop {
+        let sig = ed.read_line(&prompt);
+        match sig {
+            Ok(Signal::Success(buf)) => {
+                let cmd = buf.as_str().trim();
+                match cmd {
+                    "" => continue,
+                    "\\?" | "\\h" | "help" => print_basic_commands(),
+                    "\\q" | "quit" | "exit" => return Ok(()),
+                    "\\l" | "\\d" => list_timeseries(&client).await?,
+                    _ => {
+                        if let Some(timeseries_name) = cmd.strip_prefix("\\d") {
+                            if timeseries_name.is_empty() {
+                                list_timeseries(&client).await?;
+                            } else {
+                                describe_timeseries(
+                                    &client,
+                                    timeseries_name
+                                        .trim()
+                                        .trim_end_matches(';'),
+                                )
+                                .await?;
+                            }
+                        } else if let Some(stmt) = cmd.strip_prefix("\\ql") {
+                            let stmt = stmt.trim();
+                            if stmt.is_empty() {
+                                print_general_oxql_help();
+                            } else {
+                                print_oxql_operation_help(stmt);
+                            }
+                        } else {
+                            match client
+                                .oxql_query(cmd.trim().trim_end_matches(';'))
+                                .await
+                            {
+                                Ok(result) => {
+                                    print_query_summary(
+                                        &result,
+                                        opts.print_elapsed,
+                                        opts.print_summaries,
+                                    );
+                                    print_tables(&result.tables);
+                                }
+                                Err(e) => {
+                                    eprintln!("{}", "Error".underlined().red());
+                                    eprintln!("{e}");
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            Ok(Signal::CtrlD) => return Ok(()),
+            Ok(Signal::CtrlC) => continue,
+            err => eprintln!("err: {err:?}"),
+        }
+    }
+}
+
+fn print_query_summary(
+    result: &OxqlResult,
+    print_elapsed: bool,
+    print_summaries: bool,
+) {
+    if !print_elapsed && !print_summaries {
+        return;
+    }
+    println!("{}", "Query summary".underlined().bold());
+    println!(" {}: {}", "ID".bold(), result.query_id);
+    if print_elapsed {
+        println!(" {}: {:?}\n", "Total duration".bold(), result.total_duration);
+    }
+    if print_summaries {
+        println!(" {}:", "SQL queries".bold());
+        for summary in result.query_summaries.iter() {
+            println!("  {}: {}", "ID".bold(), summary.id);
+            println!("  {}: {:?}", "Duration".bold(), summary.elapsed);
+            println!("  {}: {}", "Read".bold(), summary.io_summary.read);
+            println!();
+        }
+    }
+}
+
+fn print_tables(tables: &[Table]) {
+    for table in tables.iter() {
+        println!();
+        println!("{}", table.name().underlined().bold());
+        for timeseries in table.iter() {
+            if timeseries.points.is_empty() {
+                continue;
+            }
+            println!();
+            for (name, value) in timeseries.fields.iter() {
+                println!(" {}: {}", name.as_str().bold(), value);
+            }
+            for point in timeseries.points.iter_points() {
+                println!("   {point}");
+            }
+        }
+    }
+}
diff --git a/oximeter/db/src/bin/oxdb/sql.rs b/oximeter/db/src/bin/oxdb/sql.rs
new file mode 100644
index 0000000000..d50a60f4d7
--- /dev/null
+++ b/oximeter/db/src/bin/oxdb/sql.rs
@@ -0,0 +1,298 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! SQL shell subcommand for `oxdb`.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::make_client;
+use clap::Args;
+use dropshot::EmptyScanParams;
+use dropshot::WhichPage;
+use oximeter_db::sql::function_allow_list;
+use oximeter_db::sql::QueryResult;
+use oximeter_db::sql::Table;
+use oximeter_db::Client;
+use oximeter_db::QuerySummary;
+use reedline::DefaultPrompt;
+use reedline::DefaultPromptSegment;
+use reedline::Reedline;
+use reedline::Signal;
+use slog::Logger;
+use std::net::IpAddr;
+
+fn print_basic_commands() {
+    println!("Basic commands:");
+    println!("  \\?, \\h, help      - Print this help");
+    println!("  \\q, quit, exit, ^D - Exit the shell");
+    println!("  \\l                 - List tables");
+    println!("  \\d <table>         - Describe a table");
+    println!(
+        "  \\f <function>      - List or describe ClickHouse SQL functions"
+    );
+    println!();
+    println!("Or try entering a SQL `SELECT` statement");
+}
+
+async fn list_virtual_tables(client: &Client) -> anyhow::Result<()> {
+    let mut page = WhichPage::First(EmptyScanParams {});
+    let limit = 100.try_into().unwrap();
+    loop {
+        let results = client.timeseries_schema_list(&page, limit).await?;
+        for schema in results.items.iter() {
+            println!("{}", schema.timeseries_name);
+        }
+        if results.next_page.is_some() {
+            if let Some(last) = results.items.last() {
+                page = WhichPage::Next(last.timeseries_name.clone());
+            } else {
+                return Ok(());
+            }
+        } else {
+            return Ok(());
+        }
+    }
+}
+
+async fn describe_virtual_table(
+    client: &Client,
+    table: &str,
+) -> anyhow::Result<()> {
+    match table.parse() {
+        Err(_) => println!("Invalid timeseries name: {table}"),
+        Ok(name) => {
+            if let Some(schema) = client.schema_for_timeseries(&name).await? {
+                let mut cols =
+                    Vec::with_capacity(schema.field_schema.len() + 2);
+                let mut types = cols.clone();
+                for field in schema.field_schema.iter() {
+                    cols.push(field.name.clone());
+                    types.push(field.field_type.to_string());
+                }
+                cols.push("timestamp".into());
+                types.push("DateTime64".into());
+
+                if schema.datum_type.is_histogram() {
+                    cols.push("start_time".into());
+                    types.push("DateTime64".into());
+
+                    cols.push("bins".into());
+                    types.push(format!(
+                        "Array[{}]",
+                        schema
+                            .datum_type
+                            .to_string()
+                            .strip_prefix("Histogram")
+                            .unwrap()
+                            .to_lowercase(),
+                    ));
+
+                    cols.push("counts".into());
+                    types.push("Array[u64]".into());
+                } else if schema.datum_type.is_cumulative() {
+                    cols.push("start_time".into());
+                    types.push("DateTime64".into());
+                    cols.push("datum".into());
+                    types.push(schema.datum_type.to_string());
+                } else {
+                    cols.push("datum".into());
+                    types.push(schema.datum_type.to_string());
+                }
+
+                let mut builder = tabled::builder::Builder::default();
+                builder.push_record(cols); // first record is the header
+                builder.push_record(types);
+                println!(
+                    "{}",
+                    builder.build().with(tabled::settings::Style::psql())
+                );
+            } else {
+                println!("No such timeseries: {table}");
+            }
+        }
+    }
+    Ok(())
+}
+
+#[derive(Clone, Debug, Args)]
+pub struct ShellOptions {
+    /// Print query metadata.
+    #[clap(long = "metadata")]
+    print_metadata: bool,
+    /// Print the original SQL query.
+    #[clap(long = "original")]
+    print_original_query: bool,
+    /// Print the rewritten SQL query that is actually run on the DB.
+    #[clap(long = "rewritten")]
+    print_rewritten_query: bool,
+    /// Print the transformed query, but do not run it.
+    #[clap(long)]
+    transform: Option<String>,
+}
+
+impl Default for ShellOptions {
+    fn default() -> Self {
+        Self {
+            print_metadata: true,
+            print_original_query: false,
+            print_rewritten_query: false,
+            transform: None,
+        }
+    }
+}
+
+fn list_supported_functions() {
+    println!("Subset of ClickHouse SQL functions currently supported");
+    println!(
+        "See https://clickhouse.com/docs/en/sql-reference/functions for more"
+    );
+    println!();
+    for func in function_allow_list().iter() {
+        println!(" {func}");
+    }
+}
+
+fn show_supported_function(name: &str) {
+    if let Some(func) = function_allow_list().iter().find(|f| f.name == name) {
+        println!("{}", func.name);
+        println!("  {}", func.usage);
+        println!("  {}", func.description);
+    } else {
+        println!("No supported function '{name}'");
+    }
+}
+
+fn print_sql_query(query: &str) {
+    println!(
+        "{}",
+        sqlformat::format(
+            &query,
+            &sqlformat::QueryParams::None,
+            sqlformat::FormatOptions { uppercase: true, ..Default::default() }
+        )
+    );
+    println!();
+}
+
+fn print_query_summary(table: &Table, summary: &QuerySummary) {
+    println!("Summary");
+    println!(" Query ID:    {}", summary.id);
+    println!(" Result rows: {}", table.rows.len());
+    println!(" Time:        {:?}", summary.elapsed);
+    println!(" Read:        {}\n", summary.io_summary.read);
+}
+
+pub async fn sql_shell(
+    address: IpAddr,
+    port: u16,
+    log: Logger,
+    opts: ShellOptions,
+) -> anyhow::Result<()> {
+    let client = make_client(address, port, &log).await?;
+
+    // A workaround to ensure the client has all available timeseries when the
+    // shell starts.
+    let dummy = "foo:bar".parse().unwrap();
+    let _ = client.schema_for_timeseries(&dummy).await;
+
+    // Possibly just transform the query, but do not execute it.
+    if let Some(query) = &opts.transform {
+        let transformed = client.transform_query(query).await?;
+        println!(
+            "{}",
+            sqlformat::format(
+                &transformed,
+                &sqlformat::QueryParams::None,
+                sqlformat::FormatOptions {
+                    uppercase: true,
+                    ..Default::default()
+                }
+            )
+        );
+        return Ok(());
+    }
+
+    let mut ed = Reedline::create();
+    let prompt = DefaultPrompt::new(
+        DefaultPromptSegment::Basic("0x".to_string()),
+        DefaultPromptSegment::Empty,
+    );
+    println!("Oximeter SQL shell");
+    println!();
+    print_basic_commands();
+    loop {
+        let sig = ed.read_line(&prompt);
+        match sig {
+            Ok(Signal::Success(buf)) => {
+                let cmd = buf.as_str().trim();
+                match cmd {
+                    "" => continue,
+                    "\\?" | "\\h" | "help" => print_basic_commands(),
+                    "\\q" | "quit" | "exit" => return Ok(()),
+                    "\\l" | "\\d" => list_virtual_tables(&client).await?,
+                    _ => {
+                        if let Some(table_name) = cmd.strip_prefix("\\d") {
+                            if table_name.is_empty() {
+                                list_virtual_tables(&client).await?;
+                            } else {
+                                describe_virtual_table(
+                                    &client,
+                                    table_name.trim().trim_end_matches(';'),
+                                )
+                                .await?;
+                            }
+                        } else if let Some(func_name) = cmd.strip_prefix("\\f")
+                        {
+                            if func_name.is_empty() {
+                                list_supported_functions();
+                            } else {
+                                show_supported_function(
+                                    func_name.trim().trim_end_matches(';'),
+                                );
+                            }
+                        } else {
+                            match client.query(&buf).await {
+                                Err(e) => println!("Query failed: {e:#?}"),
+                                Ok(QueryResult {
+                                    original_query,
+                                    rewritten_query,
+                                    summary,
+                                    table,
+                                }) => {
+                                    println!();
+                                    let mut builder =
+                                        tabled::builder::Builder::default();
+                                    builder.push_record(&table.column_names); // first record is the header
+                                    for row in table.rows.iter() {
+                                        builder.push_record(
+                                            row.iter().map(ToString::to_string),
+                                        );
+                                    }
+                                    if opts.print_original_query {
+                                        print_sql_query(&original_query);
+                                    }
+                                    if opts.print_rewritten_query {
+                                        print_sql_query(&rewritten_query);
+                                    }
+                                    println!(
+                                        "{}\n",
+                                        builder.build().with(
+                                            tabled::settings::Style::psql()
+                                        )
+                                    );
+                                    if opts.print_metadata {
+                                        print_query_summary(&table, &summary);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            Ok(Signal::CtrlD) => return Ok(()),
+            Ok(Signal::CtrlC) => continue,
+            err => eprintln!("err: {err:?}"),
+        }
+    }
+}
diff --git a/oximeter/db/src/client/dbwrite.rs b/oximeter/db/src/client/dbwrite.rs
new file mode 100644
index 0000000000..f21880f314
--- /dev/null
+++ b/oximeter/db/src/client/dbwrite.rs
@@ -0,0 +1,266 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Implementation of client methods that write to the ClickHouse database.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::client::Client;
+use crate::model;
+use crate::Error;
+use oximeter::Sample;
+use oximeter::TimeseriesName;
+use slog::debug;
+use std::collections::BTreeMap;
+use std::collections::BTreeSet;
+
+#[derive(Debug)]
+pub(super) struct UnrolledSampleRows {
+    /// The timeseries schema rows, keyed by timeseries name.
+    pub new_schema: BTreeMap<TimeseriesName, String>,
+    /// The rows to insert in all the other tables, keyed by the table name.
+    pub rows: BTreeMap<String, Vec<String>>,
+}
+
+/// A trait allowing a [`Client`] to write data into the timeseries database.
+///
+/// The vanilla [`Client`] object allows users to query the timeseries database, returning
+/// timeseries samples corresponding to various filtering criteria. This trait segregates the
+/// methods required for _writing_ new data into the database, and is intended only for use by the
+/// `oximeter-collector` crate.
+#[async_trait::async_trait]
+pub trait DbWrite {
+    /// Insert the given samples into the database.
+    async fn insert_samples(&self, samples: &[Sample]) -> Result<(), Error>;
+
+    /// Initialize the replicated telemetry database, creating tables as needed.
+    async fn init_replicated_db(&self) -> Result<(), Error>;
+
+    /// Initialize a single node telemetry database, creating tables as needed.
+    async fn init_single_node_db(&self) -> Result<(), Error>;
+
+    /// Wipe the ClickHouse database entirely from a single node set up.
+    async fn wipe_single_node_db(&self) -> Result<(), Error>;
+
+    /// Wipe the ClickHouse database entirely from a replicated set up.
+    async fn wipe_replicated_db(&self) -> Result<(), Error>;
+}
+
+#[async_trait::async_trait]
+impl DbWrite for Client {
+    /// Insert the given samples into the database.
+    async fn insert_samples(&self, samples: &[Sample]) -> Result<(), Error> {
+        debug!(self.log, "unrolling {} total samples", samples.len());
+        let UnrolledSampleRows { new_schema, rows } =
+            self.unroll_samples(samples).await;
+        self.save_new_schema_or_remove(new_schema).await?;
+        self.insert_unrolled_samples(rows).await
+    }
+
+    /// Initialize the replicated telemetry database, creating tables as needed.
+    async fn init_replicated_db(&self) -> Result<(), Error> {
+        debug!(self.log, "initializing ClickHouse database");
+        self.run_many_sql_statements(include_str!(concat!(
+            env!("CARGO_MANIFEST_DIR"),
+            "/schema/replicated/db-init.sql"
+        )))
+        .await
+    }
+
+    /// Wipe the ClickHouse database entirely from a replicated set up.
+    async fn wipe_replicated_db(&self) -> Result<(), Error> {
+        debug!(self.log, "wiping ClickHouse database");
+        self.run_many_sql_statements(include_str!(concat!(
+            env!("CARGO_MANIFEST_DIR"),
+            "/schema/replicated/db-wipe.sql"
+        )))
+        .await
+    }
+
+    /// Initialize a single node telemetry database, creating tables as needed.
+    async fn init_single_node_db(&self) -> Result<(), Error> {
+        debug!(self.log, "initializing ClickHouse database");
+        self.run_many_sql_statements(include_str!(concat!(
+            env!("CARGO_MANIFEST_DIR"),
+            "/schema/single-node/db-init.sql"
+        )))
+        .await
+    }
+
+    /// Wipe the ClickHouse database entirely from a single node set up.
+    async fn wipe_single_node_db(&self) -> Result<(), Error> {
+        debug!(self.log, "wiping ClickHouse database");
+        self.run_many_sql_statements(include_str!(concat!(
+            env!("CARGO_MANIFEST_DIR"),
+            "/schema/single-node/db-wipe.sql"
+        )))
+        .await
+    }
+}
+
+impl Client {
+    // Unroll each sample into its consituent rows, after verifying the schema.
+    //
+    // Note that this also inserts the schema into the internal cache, if it
+    // does not already exist there.
+    pub(super) async fn unroll_samples(
+        &self,
+        samples: &[Sample],
+    ) -> UnrolledSampleRows {
+        let mut seen_timeseries = BTreeSet::new();
+        let mut rows = BTreeMap::new();
+        let mut new_schema = BTreeMap::new();
+
+        for sample in samples.iter() {
+            match self.verify_or_cache_sample_schema(sample).await {
+                Err(_) => {
+                    // Skip the sample, but otherwise do nothing. The error is logged in the above
+                    // call.
+                    continue;
+                }
+                Ok(None) => {}
+                Ok(Some((name, schema))) => {
+                    debug!(
+                        self.log,
+                        "new timeseries schema";
+                        "timeseries_name" => %name,
+                        "schema" => %schema
+                    );
+                    new_schema.insert(name, schema);
+                }
+            }
+
+            // Key on both the timeseries name and key, as timeseries may actually share keys.
+            let key = (
+                sample.timeseries_name.as_str(),
+                crate::timeseries_key(sample),
+            );
+            if !seen_timeseries.contains(&key) {
+                for (table_name, table_rows) in model::unroll_field_rows(sample)
+                {
+                    rows.entry(table_name)
+                        .or_insert_with(Vec::new)
+                        .extend(table_rows);
+                }
+            }
+
+            let (table_name, measurement_row) =
+                model::unroll_measurement_row(sample);
+
+            rows.entry(table_name)
+                .or_insert_with(Vec::new)
+                .push(measurement_row);
+
+            seen_timeseries.insert(key);
+        }
+
+        UnrolledSampleRows { new_schema, rows }
+    }
+
+    // Insert unrolled sample rows into the corresponding tables.
+    async fn insert_unrolled_samples(
+        &self,
+        rows: BTreeMap<String, Vec<String>>,
+    ) -> Result<(), Error> {
+        for (table_name, rows) in rows {
+            let body = format!(
+                "INSERT INTO {table_name} FORMAT JSONEachRow\n{row_data}\n",
+                table_name = table_name,
+                row_data = rows.join("\n")
+            );
+            // TODO-robustness We've verified the schema, so this is likely a transient failure.
+            // But we may want to check the actual error condition, and, if possible, continue
+            // inserting any remaining data.
+            self.execute(body).await?;
+            debug!(
+                self.log,
+                "inserted rows into table";
+                "n_rows" => rows.len(),
+                "table_name" => table_name,
+            );
+        }
+
+        // TODO-correctness We'd like to return all errors to clients here, and there may be as
+        // many as one per sample. It's not clear how to structure this in a way that's useful.
+        Ok(())
+    }
+
+    // Save new schema to the database, or remove them from the cache on
+    // failure.
+    //
+    // This attempts to insert the provided schema into the timeseries schema
+    // table. If that fails, those schema are _also_ removed from the internal
+    // cache.
+    //
+    // TODO-robustness There's still a race possible here. If two distinct clients receive new
+    // but conflicting schema, they will both try to insert those at some point into the schema
+    // tables. It's not clear how to handle this, since ClickHouse provides no transactions.
+    // This is unlikely to happen at this point, because the design is such that there will be
+    // a single `oximeter` instance, which has one client object, connected to a single
+    // ClickHouse server. But once we start replicating data, the window within which the race
+    // can occur is much larger, since it includes the time it takes ClickHouse to replicate
+    // data between nodes.
+    //
+    // NOTE: This is an issue even in the case where the schema don't conflict. Two clients may
+    // receive a sample with a new schema, and both would then try to insert that schema.
+    pub(super) async fn save_new_schema_or_remove(
+        &self,
+        new_schema: BTreeMap<TimeseriesName, String>,
+    ) -> Result<(), Error> {
+        if !new_schema.is_empty() {
+            debug!(
+                self.log,
+                "inserting {} new timeseries schema",
+                new_schema.len()
+            );
+            const APPROX_ROW_SIZE: usize = 64;
+            let mut body = String::with_capacity(
+                APPROX_ROW_SIZE + APPROX_ROW_SIZE * new_schema.len(),
+            );
+            body.push_str("INSERT INTO ");
+            body.push_str(crate::DATABASE_NAME);
+            body.push_str(".timeseries_schema FORMAT JSONEachRow\n");
+            for row_data in new_schema.values() {
+                body.push_str(row_data);
+                body.push('\n');
+            }
+
+            // Try to insert the schema.
+            //
+            // If this fails, be sure to remove the schema we've added from the
+            // internal cache. Since we check the internal cache first for
+            // schema, if we fail here but _don't_ remove the schema, we'll
+            // never end up inserting the schema, but we will insert samples.
+            if let Err(e) = self.execute(body).await {
+                debug!(
+                    self.log,
+                    "failed to insert new schema, removing from cache";
+                    "error" => ?e,
+                );
+                let mut schema = self.schema.lock().await;
+                for name in new_schema.keys() {
+                    schema
+                        .remove(name)
+                        .expect("New schema should have been cached");
+                }
+                return Err(e);
+            }
+        }
+        Ok(())
+    }
+
+    // Run one or more SQL statements.
+    //
+    // This is intended to be used for the methods which run SQL from one of the
+    // SQL files in the crate, e.g., the DB initialization or update files.
+    async fn run_many_sql_statements(
+        &self,
+        sql: impl AsRef<str>,
+    ) -> Result<(), Error> {
+        for stmt in sql.as_ref().split(';').filter(|s| !s.trim().is_empty()) {
+            self.execute(stmt).await?;
+        }
+        Ok(())
+    }
+}
diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client/mod.rs
similarity index 88%
rename from oximeter/db/src/client.rs
rename to oximeter/db/src/client/mod.rs
index abea11aa06..e92518ae08 100644
--- a/oximeter/db/src/client.rs
+++ b/oximeter/db/src/client/mod.rs
@@ -4,11 +4,19 @@
 
 //! Rust client to ClickHouse database
 
-// Copyright 2023 Oxide Computer Company
+// Copyright 2024 Oxide Computer Company
 
+pub(crate) mod dbwrite;
+#[cfg(any(feature = "oxql", test))]
+pub(crate) mod oxql;
+pub(crate) mod query_summary;
+#[cfg(any(feature = "sql", test))]
+mod sql;
+
+pub use self::dbwrite::DbWrite;
+use crate::client::query_summary::QuerySummary;
 use crate::model;
 use crate::query;
-use crate::sql::RestrictedQuery;
 use crate::Error;
 use crate::Metric;
 use crate::Target;
@@ -18,16 +26,13 @@ use crate::TimeseriesName;
 use crate::TimeseriesPageSelector;
 use crate::TimeseriesScanParams;
 use crate::TimeseriesSchema;
-use async_trait::async_trait;
 use dropshot::EmptyScanParams;
 use dropshot::PaginationOrder;
 use dropshot::ResultsPage;
 use dropshot::WhichPage;
-use indexmap::IndexMap;
 use oximeter::types::Sample;
 use regex::Regex;
 use regex::RegexBuilder;
-use reqwest::header::HeaderMap;
 use slog::debug;
 use slog::error;
 use slog::info;
@@ -44,7 +49,6 @@ use std::ops::Bound;
 use std::path::Path;
 use std::path::PathBuf;
 use std::sync::OnceLock;
-use std::time::Duration;
 use std::time::Instant;
 use tokio::fs;
 use tokio::sync::Mutex;
@@ -56,139 +60,11 @@ const CLICKHOUSE_DB_VERSION_MISSING: &'static str =
 
 #[usdt::provider(provider = "clickhouse_client")]
 mod probes {
-    fn query__start(_: &usdt::UniqueId, sql: &str) {}
-    fn query__done(_: &usdt::UniqueId) {}
-}
-
-/// A count of bytes / rows accessed during a query.
-#[derive(Clone, Copy, Debug)]
-pub struct IoCount {
-    pub bytes: u64,
-    pub rows: u64,
-}
-
-impl std::fmt::Display for IoCount {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{} rows ({} bytes)", self.rows, self.bytes)
-    }
-}
-
-/// Summary of the I/O and duration of a query.
-#[derive(Clone, Copy, Debug, serde::Deserialize)]
-#[serde(try_from = "serde_json::Value")]
-pub struct QuerySummary {
-    /// The bytes and rows read by the query.
-    pub read: IoCount,
-    /// The bytes and rows written by the query.
-    pub written: IoCount,
-}
-
-impl TryFrom<serde_json::Value> for QuerySummary {
-    type Error = Error;
-
-    fn try_from(j: serde_json::Value) -> Result<Self, Self::Error> {
-        use serde_json::Map;
-        use serde_json::Value;
-        use std::str::FromStr;
-
-        let Value::Object(map) = j else {
-            return Err(Error::Database(String::from(
-                "Expected a JSON object for a metadata summary",
-            )));
-        };
+    /// Fires when a SQL query begins, with the query string.
+    fn sql__query__start(_: &usdt::UniqueId, sql: &str) {}
 
-        fn unpack_summary_value<T>(
-            map: &Map<String, Value>,
-            key: &str,
-        ) -> Result<T, Error>
-        where
-            T: FromStr,
-            <T as FromStr>::Err: std::error::Error,
-        {
-            let value = map.get(key).ok_or_else(|| {
-                Error::MissingHeaderKey { key: key.to_string() }
-            })?;
-            let Value::String(v) = value else {
-                return Err(Error::BadMetadata {
-                    key: key.to_string(),
-                    msg: String::from("Expected a string value"),
-                });
-            };
-            v.parse::<T>().map_err(|e| Error::BadMetadata {
-                key: key.to_string(),
-                msg: e.to_string(),
-            })
-        }
-        let rows_read: u64 = unpack_summary_value(&map, "read_rows")?;
-        let bytes_read: u64 = unpack_summary_value(&map, "read_bytes")?;
-        let rows_written: u64 = unpack_summary_value(&map, "written_rows")?;
-        let bytes_written: u64 = unpack_summary_value(&map, "written_bytes")?;
-        Ok(Self {
-            read: IoCount { bytes: bytes_read, rows: rows_read },
-            written: IoCount { bytes: bytes_written, rows: rows_written },
-        })
-    }
-}
-
-/// Basic metadata about the resource usage of a single SQL query.
-#[derive(Clone, Copy, Debug)]
-pub struct QueryMetadata {
-    /// The database-assigned query ID.
-    pub id: Uuid,
-    /// The total duration of the query (network plus execution).
-    pub elapsed: Duration,
-    /// Summary of the data read and written.
-    pub summary: QuerySummary,
-}
-
-impl QueryMetadata {
-    fn from_headers(
-        elapsed: Duration,
-        headers: &HeaderMap,
-    ) -> Result<Self, Error> {
-        fn get_header<'a>(
-            map: &'a HeaderMap,
-            key: &'a str,
-        ) -> Result<&'a str, Error> {
-            let hdr = map.get(key).ok_or_else(|| Error::MissingHeaderKey {
-                key: key.to_string(),
-            })?;
-            std::str::from_utf8(hdr.as_bytes())
-                .map_err(|err| Error::Database(err.to_string()))
-        }
-        let summary =
-            serde_json::from_str(get_header(headers, "X-ClickHouse-Summary")?)
-                .map_err(|err| Error::Database(err.to_string()))?;
-        let id = get_header(headers, "X-ClickHouse-Query-Id")?
-            .parse()
-            .map_err(|err: uuid::Error| Error::Database(err.to_string()))?;
-        Ok(Self { id, elapsed, summary })
-    }
-}
-
-/// A tabular result from a SQL query against a timeseries.
-#[derive(Clone, Debug, Default, serde::Serialize)]
-pub struct Table {
-    /// The name of each column in the result set.
-    pub column_names: Vec<String>,
-    /// The rows of the result set, one per column.
-    pub rows: Vec<Vec<serde_json::Value>>,
-}
-
-/// The full result of running a SQL query against a timeseries.
-#[derive(Clone, Debug)]
-pub struct QueryResult {
-    /// The query as written by the client.
-    pub original_query: String,
-    /// The rewritten query, run against the JOINed representation of the
-    /// timeseries.
-    ///
-    /// This is the query that is actually run in the database itself.
-    pub rewritten_query: String,
-    /// Metadata about the resource usage of the query.
-    pub metadata: QueryMetadata,
-    /// The result of the query, with column names and rows.
-    pub table: Table,
+    /// Fires when a SQL query ends, either in success or failure.
+    fn sql__query__done(_: &usdt::UniqueId) {}
 }
 
 /// A `Client` to the ClickHouse metrics database.
@@ -229,76 +105,6 @@ impl Client {
         Ok(())
     }
 
-    /// Transform a SQL query against a timeseries, but do not execute it.
-    pub async fn transform_query(
-        &self,
-        query: impl AsRef<str>,
-    ) -> Result<String, Error> {
-        let restricted = RestrictedQuery::new(query.as_ref())?;
-        restricted.to_oximeter_sql(&*self.schema.lock().await)
-    }
-
-    /// Run a SQL query against a timeseries.
-    pub async fn query(
-        &self,
-        query: impl AsRef<str>,
-    ) -> Result<QueryResult, Error> {
-        let original_query = query.as_ref().trim_end_matches(';');
-        let ox_sql = self.transform_query(original_query).await?;
-        let rewritten = format!("{ox_sql} FORMAT JSONEachRow");
-        debug!(
-            self.log,
-            "rewrote restricted query";
-            "original_sql" => &original_query,
-            "rewritten_sql" => &rewritten,
-        );
-        let request = self
-            .client
-            .post(&self.url)
-            .query(&[
-                ("output_format_json_quote_64bit_integers", "0"),
-                ("database", crate::DATABASE_NAME),
-            ])
-            .body(rewritten.clone());
-        let query_start = Instant::now();
-        let response = handle_db_response(
-            request
-                .send()
-                .await
-                .map_err(|err| Error::DatabaseUnavailable(err.to_string()))?,
-        )
-        .await?;
-        let metadata = QueryMetadata::from_headers(
-            query_start.elapsed(),
-            response.headers(),
-        )?;
-        let text = response.text().await.unwrap();
-        let mut table = Table::default();
-        for line in text.lines() {
-            let row =
-                serde_json::from_str::<IndexMap<String, serde_json::Value>>(
-                    line.trim(),
-                )
-                .unwrap();
-            if table.column_names.is_empty() {
-                table.column_names.extend(row.keys().cloned())
-            } else {
-                assert!(table
-                    .column_names
-                    .iter()
-                    .zip(row.keys())
-                    .all(|(k1, k2)| k1 == k2));
-            }
-            table.rows.push(row.into_values().collect());
-        }
-        Ok(QueryResult {
-            original_query: original_query.to_string(),
-            rewritten_query: rewritten,
-            metadata,
-            table,
-        })
-    }
-
     /// Select timeseries from criteria on the fields and start/end timestamps.
     pub async fn select_timeseries_with(
         &self,
@@ -348,6 +154,7 @@ impl Client {
             Some(field_query) => {
                 self.select_matching_timeseries_info(&field_query, &schema)
                     .await?
+                    .1
             }
             None => BTreeMap::new(),
         };
@@ -367,6 +174,7 @@ impl Client {
         }
     }
 
+    /// Return a page of timeseries schema from the database.
     pub async fn list_timeseries(
         &self,
         page: &WhichPage<TimeseriesScanParams, TimeseriesPageSelector>,
@@ -401,6 +209,7 @@ impl Client {
             Some(field_query) => {
                 self.select_matching_timeseries_info(&field_query, &schema)
                     .await?
+                    .1
             }
             None => BTreeMap::new(),
         };
@@ -445,6 +254,7 @@ impl Client {
                     concat!(
                         "SELECT * ",
                         "FROM {}.timeseries_schema ",
+                        "ORDER BY timeseries_name ",
                         "LIMIT {} ",
                         "FORMAT JSONEachRow;",
                     ),
@@ -457,6 +267,7 @@ impl Client {
                     concat!(
                         "SELECT * FROM {}.timeseries_schema ",
                         "WHERE timeseries_name > '{}' ",
+                        "ORDER BY timeseries_name ",
                         "LIMIT {} ",
                         "FORMAT JSONEachRow;",
                     ),
@@ -466,7 +277,7 @@ impl Client {
                 )
             }
         };
-        let body = self.execute_with_body(sql).await?;
+        let body = self.execute_with_body(sql).await?.1;
         let schema = body
             .lines()
             .map(|line| {
@@ -848,14 +659,14 @@ impl Client {
         );
 
         let version = match self.execute_with_body(sql).await {
-            Ok(body) if body.is_empty() => {
+            Ok((_, body)) if body.is_empty() => {
                 warn!(
                     self.log,
                     "no version in database (treated as 'version 0')"
                 );
                 0
             }
-            Ok(body) => body.trim().parse::<u64>().map_err(|err| {
+            Ok((_, body)) => body.trim().parse::<u64>().map_err(|err| {
                 Error::Database(format!("Cannot read version: {err}"))
             })?,
             Err(Error::Database(err))
@@ -895,14 +706,13 @@ impl Client {
             "INSERT INTO {db_name}.version (*) VALUES ({version}, now());",
             db_name = crate::DATABASE_NAME,
         );
-        self.execute_with_body(sql).await?;
-        Ok(())
+        self.execute(sql).await
     }
 
     /// Verifies if instance is part of oximeter_cluster
     pub async fn is_oximeter_cluster(&self) -> Result<bool, Error> {
         let sql = "SHOW CLUSTERS FORMAT JSONEachRow;";
-        let res = self.execute_with_body(sql).await?;
+        let res = self.execute_with_body(sql).await?.1;
         Ok(res.contains("oximeter_cluster"))
     }
 
@@ -972,8 +782,9 @@ impl Client {
         &self,
         field_query: &str,
         schema: &TimeseriesSchema,
-    ) -> Result<BTreeMap<TimeseriesKey, (Target, Metric)>, Error> {
-        let body = self.execute_with_body(field_query).await?;
+    ) -> Result<(QuerySummary, BTreeMap<TimeseriesKey, (Target, Metric)>), Error>
+    {
+        let (summary, body) = self.execute_with_body(field_query).await?;
         let mut results = BTreeMap::new();
         for line in body.lines() {
             let row: model::FieldSelectRow = serde_json::from_str(line)
@@ -982,7 +793,7 @@ impl Client {
                 model::parse_field_select_row(&row, schema);
             results.insert(id, (target, metric));
         }
-        Ok(results)
+        Ok((summary, results))
     }
 
     // Given information returned from `select_matching_timeseries_info`, select the actual
@@ -996,7 +807,8 @@ impl Client {
         let mut timeseries_by_key = BTreeMap::new();
         let keys = info.keys().copied().collect::<Vec<_>>();
         let measurement_query = query.measurement_query(&keys);
-        for line in self.execute_with_body(&measurement_query).await?.lines() {
+        for line in self.execute_with_body(&measurement_query).await?.1.lines()
+        {
             let (key, measurement) =
                 model::parse_measurement_from_row(line, schema.datum_type);
             let timeseries = timeseries_by_key.entry(key).or_insert_with(
@@ -1032,7 +844,10 @@ impl Client {
     // Execute a generic SQL statement, awaiting the response as text
     //
     // TODO-robustness This currently does no validation of the statement.
-    async fn execute_with_body<S>(&self, sql: S) -> Result<String, Error>
+    async fn execute_with_body<S>(
+        &self,
+        sql: S,
+    ) -> Result<(QuerySummary, String), Error>
     where
         S: AsRef<str>,
     {
@@ -1042,24 +857,50 @@ impl Client {
             "executing SQL query";
             "sql" => &sql,
         );
+
+        // Run the SQL query itself.
+        //
+        // This code gets a bit convoluted, so that we can fire the USDT probe
+        // in all situations, even when the various fallible operations
+        // complete.
         let id = usdt::UniqueId::new();
-        probes::query__start!(|| (&id, &sql));
-        let response = handle_db_response(
-            self.client
-                .post(&self.url)
-                // See regression test `test_unquoted_64bit_integers` for details.
-                .query(&[("output_format_json_quote_64bit_integers", "0")])
-                .body(sql)
-                .send()
-                .await
-                .map_err(|err| Error::DatabaseUnavailable(err.to_string()))?,
-        )
-        .await?
-        .text()
-        .await
-        .map_err(|err| Error::Database(err.to_string()));
-        probes::query__done!(|| (&id));
-        response
+        probes::sql__query__start!(|| (&id, &sql));
+        let start = Instant::now();
+
+        // Submit the SQL request itself.
+        let response = self
+            .client
+            .post(&self.url)
+            .query(&[("output_format_json_quote_64bit_integers", "0")])
+            .body(sql)
+            .send()
+            .await
+            .map_err(|err| {
+                probes::sql__query__done!(|| (&id));
+                Error::DatabaseUnavailable(err.to_string())
+            })?;
+
+        // Convert the HTTP response into a database response.
+        let response = handle_db_response(response).await.map_err(|err| {
+            probes::sql__query__done!(|| (&id));
+            err
+        })?;
+
+        // Extract the query summary, measuring resource usage and duration.
+        let summary =
+            QuerySummary::from_headers(start.elapsed(), response.headers())
+                .map_err(|err| {
+                    probes::sql__query__done!(|| (&id));
+                    err
+                })?;
+
+        // Extract the actual text of the response.
+        let text = response.text().await.map_err(|err| {
+            probes::sql__query__done!(|| (&id));
+            Error::Database(err.to_string())
+        })?;
+        probes::sql__query__done!(|| (&id));
+        Ok((summary, text))
     }
 
     // Get timeseries schema from the database.
@@ -1095,7 +936,7 @@ impl Client {
                 )
             }
         };
-        let body = self.execute_with_body(sql).await?;
+        let body = self.execute_with_body(sql).await?.1;
         if body.is_empty() {
             trace!(self.log, "no new timeseries schema in database");
         } else {
@@ -1113,167 +954,6 @@ impl Client {
         }
         Ok(())
     }
-
-    // Unroll each sample into its consituent rows, after verifying the schema.
-    //
-    // Note that this also inserts the schema into the internal cache, if it
-    // does not already exist there.
-    async fn unroll_samples(&self, samples: &[Sample]) -> UnrolledSampleRows {
-        let mut seen_timeseries = BTreeSet::new();
-        let mut rows = BTreeMap::new();
-        let mut new_schema = BTreeMap::new();
-
-        for sample in samples.iter() {
-            match self.verify_or_cache_sample_schema(sample).await {
-                Err(_) => {
-                    // Skip the sample, but otherwise do nothing. The error is logged in the above
-                    // call.
-                    continue;
-                }
-                Ok(None) => {}
-                Ok(Some((name, schema))) => {
-                    debug!(
-                        self.log,
-                        "new timeseries schema";
-                        "timeseries_name" => %name,
-                        "schema" => %schema
-                    );
-                    new_schema.insert(name, schema);
-                }
-            }
-
-            // Key on both the timeseries name and key, as timeseries may actually share keys.
-            let key = (
-                sample.timeseries_name.as_str(),
-                crate::timeseries_key(&sample),
-            );
-            if !seen_timeseries.contains(&key) {
-                for (table_name, table_rows) in model::unroll_field_rows(sample)
-                {
-                    rows.entry(table_name)
-                        .or_insert_with(Vec::new)
-                        .extend(table_rows);
-                }
-            }
-
-            let (table_name, measurement_row) =
-                model::unroll_measurement_row(sample);
-
-            rows.entry(table_name)
-                .or_insert_with(Vec::new)
-                .push(measurement_row);
-
-            seen_timeseries.insert(key);
-        }
-
-        UnrolledSampleRows { new_schema, rows }
-    }
-
-    // Save new schema to the database, or remove them from the cache on
-    // failure.
-    //
-    // This attempts to insert the provided schema into the timeseries schema
-    // table. If that fails, those schema are _also_ removed from the internal
-    // cache.
-    //
-    // TODO-robustness There's still a race possible here. If two distinct clients receive new
-    // but conflicting schema, they will both try to insert those at some point into the schema
-    // tables. It's not clear how to handle this, since ClickHouse provides no transactions.
-    // This is unlikely to happen at this point, because the design is such that there will be
-    // a single `oximeter` instance, which has one client object, connected to a single
-    // ClickHouse server. But once we start replicating data, the window within which the race
-    // can occur is much larger, since it includes the time it takes ClickHouse to replicate
-    // data between nodes.
-    //
-    // NOTE: This is an issue even in the case where the schema don't conflict. Two clients may
-    // receive a sample with a new schema, and both would then try to insert that schema.
-    async fn save_new_schema_or_remove(
-        &self,
-        new_schema: BTreeMap<TimeseriesName, String>,
-    ) -> Result<(), Error> {
-        if !new_schema.is_empty() {
-            debug!(
-                self.log,
-                "inserting {} new timeseries schema",
-                new_schema.len()
-            );
-            const APPROX_ROW_SIZE: usize = 64;
-            let mut body = String::with_capacity(
-                APPROX_ROW_SIZE + APPROX_ROW_SIZE * new_schema.len(),
-            );
-            body.push_str("INSERT INTO ");
-            body.push_str(crate::DATABASE_NAME);
-            body.push_str(".timeseries_schema FORMAT JSONEachRow\n");
-            for row_data in new_schema.values() {
-                body.push_str(row_data);
-                body.push_str("\n");
-            }
-
-            // Try to insert the schema.
-            //
-            // If this fails, be sure to remove the schema we've added from the
-            // internal cache. Since we check the internal cache first for
-            // schema, if we fail here but _don't_ remove the schema, we'll
-            // never end up inserting the schema, but we will insert samples.
-            if let Err(e) = self.execute(body).await {
-                debug!(
-                    self.log,
-                    "failed to insert new schema, removing from cache";
-                    "error" => ?e,
-                );
-                let mut schema = self.schema.lock().await;
-                for name in new_schema.keys() {
-                    schema
-                        .remove(name)
-                        .expect("New schema should have been cached");
-                }
-                return Err(e);
-            }
-        }
-        Ok(())
-    }
-
-    // Insert unrolled sample rows into the corresponding tables.
-    async fn insert_unrolled_samples(
-        &self,
-        rows: BTreeMap<String, Vec<String>>,
-    ) -> Result<(), Error> {
-        for (table_name, rows) in rows {
-            let body = format!(
-                "INSERT INTO {table_name} FORMAT JSONEachRow\n{row_data}\n",
-                table_name = table_name,
-                row_data = rows.join("\n")
-            );
-            // TODO-robustness We've verified the schema, so this is likely a transient failure.
-            // But we may want to check the actual error condition, and, if possible, continue
-            // inserting any remaining data.
-            self.execute(body).await?;
-            debug!(
-                self.log,
-                "inserted rows into table";
-                "n_rows" => rows.len(),
-                "table_name" => table_name,
-            );
-        }
-
-        // TODO-correctness We'd like to return all errors to clients here, and there may be as
-        // many as one per sample. It's not clear how to structure this in a way that's useful.
-        Ok(())
-    }
-
-    // Run one or more SQL statements.
-    //
-    // This is intended to be used for the methods which run SQL from one of the
-    // SQL files in the crate, e.g., the DB initialization or update files.
-    async fn run_many_sql_statements(
-        &self,
-        sql: impl AsRef<str>,
-    ) -> Result<(), Error> {
-        for stmt in sql.as_ref().split(';').filter(|s| !s.trim().is_empty()) {
-            self.execute(stmt).await?;
-        }
-        Ok(())
-    }
 }
 
 // A regex used to validate supported schema updates.
@@ -1297,87 +977,6 @@ fn schema_validation_regex() -> &'static Regex {
         .expect("Invalid regex")
     })
 }
-
-#[derive(Debug)]
-struct UnrolledSampleRows {
-    // The timeseries schema rows, keyed by timeseries name.
-    new_schema: BTreeMap<TimeseriesName, String>,
-    // The rows to insert in all the other tables, keyed by the table name.
-    rows: BTreeMap<String, Vec<String>>,
-}
-
-/// A trait allowing a [`Client`] to write data into the timeseries database.
-///
-/// The vanilla [`Client`] object allows users to query the timeseries database, returning
-/// timeseries samples corresponding to various filtering criteria. This trait segregates the
-/// methods required for _writing_ new data into the database, and is intended only for use by the
-/// `oximeter-collector` crate.
-#[async_trait]
-pub trait DbWrite {
-    /// Insert the given samples into the database.
-    async fn insert_samples(&self, samples: &[Sample]) -> Result<(), Error>;
-
-    /// Initialize the replicated telemetry database, creating tables as needed.
-    async fn init_replicated_db(&self) -> Result<(), Error>;
-
-    /// Initialize a single node telemetry database, creating tables as needed.
-    async fn init_single_node_db(&self) -> Result<(), Error>;
-
-    /// Wipe the ClickHouse database entirely from a single node set up.
-    async fn wipe_single_node_db(&self) -> Result<(), Error>;
-
-    /// Wipe the ClickHouse database entirely from a replicated set up.
-    async fn wipe_replicated_db(&self) -> Result<(), Error>;
-}
-
-#[async_trait]
-impl DbWrite for Client {
-    /// Insert the given samples into the database.
-    async fn insert_samples(&self, samples: &[Sample]) -> Result<(), Error> {
-        debug!(self.log, "unrolling {} total samples", samples.len());
-        let UnrolledSampleRows { new_schema, rows } =
-            self.unroll_samples(samples).await;
-        self.save_new_schema_or_remove(new_schema).await?;
-        self.insert_unrolled_samples(rows).await
-    }
-
-    /// Initialize the replicated telemetry database, creating tables as needed.
-    async fn init_replicated_db(&self) -> Result<(), Error> {
-        debug!(self.log, "initializing ClickHouse database");
-        self.run_many_sql_statements(include_str!(
-            "../schema/replicated/db-init.sql"
-        ))
-        .await
-    }
-
-    /// Wipe the ClickHouse database entirely from a replicated set up.
-    async fn wipe_replicated_db(&self) -> Result<(), Error> {
-        debug!(self.log, "wiping ClickHouse database");
-        self.run_many_sql_statements(include_str!(
-            "../schema/replicated/db-wipe.sql"
-        ))
-        .await
-    }
-
-    /// Initialize a single node telemetry database, creating tables as needed.
-    async fn init_single_node_db(&self) -> Result<(), Error> {
-        debug!(self.log, "initializing ClickHouse database");
-        self.run_many_sql_statements(include_str!(
-            "../schema/single-node/db-init.sql"
-        ))
-        .await
-    }
-
-    /// Wipe the ClickHouse database entirely from a single node set up.
-    async fn wipe_single_node_db(&self) -> Result<(), Error> {
-        debug!(self.log, "wiping ClickHouse database");
-        self.run_many_sql_statements(include_str!(
-            "../schema/single-node/db-wipe.sql"
-        ))
-        .await
-    }
-}
-
 // Return Ok if the response indicates success, otherwise return either the reqwest::Error, if this
 // is a client-side error, or the body of the actual error retrieved from ClickHouse if the error
 // was generated there.
@@ -1397,6 +996,7 @@ async fn handle_db_response(
 
 #[cfg(test)]
 mod tests {
+    use super::dbwrite::UnrolledSampleRows;
     use super::*;
     use crate::model::OXIMETER_VERSION;
     use crate::query;
@@ -1933,7 +1533,7 @@ mod tests {
         let mut result = String::from("");
         let tries = 5;
         for _ in 0..tries {
-            result = client_2.execute_with_body(sql.clone()).await.unwrap();
+            result = client_2.execute_with_body(sql.clone()).await.unwrap().1;
             if !result.contains("oximeter") {
                 sleep(Duration::from_secs(1)).await;
                 continue;
@@ -1948,21 +1548,21 @@ mod tests {
         let sql = String::from(
             "INSERT INTO oximeter.measurements_string (datum) VALUES ('hiya');",
         );
-        let result = client_2.execute_with_body(sql.clone()).await.unwrap();
+        let result = client_2.execute_with_body(sql.clone()).await.unwrap().1;
         info!(log, "Inserted datum to client #2"; "sql" => sql, "result" => result);
 
         // Make sure replicas are synched
         let sql = String::from(
             "SYSTEM SYNC REPLICA oximeter.measurements_string_local;",
         );
-        let result = client_1.execute_with_body(sql.clone()).await.unwrap();
+        let result = client_1.execute_with_body(sql.clone()).await.unwrap().1;
         info!(log, "Synced replicas via client #1"; "sql" => sql, "result" => result);
 
         // Make sure data exists in the other replica
         let sql = String::from(
             "SELECT * FROM oximeter.measurements_string FORMAT JSONEachRow;",
         );
-        let result = client_1.execute_with_body(sql.clone()).await.unwrap();
+        let result = client_1.execute_with_body(sql.clone()).await.unwrap().1;
         info!(log, "Retrieved values via client #1"; "sql" => sql, "result" => result.clone());
         assert!(result.contains("hiya"));
 
@@ -2124,7 +1724,7 @@ mod tests {
         let sql = String::from(
             "SELECT * FROM oximeter.timeseries_schema FORMAT JSONEachRow;",
         );
-        let result = client.execute_with_body(sql).await.unwrap();
+        let result = client.execute_with_body(sql).await.unwrap().1;
         let schema = result
             .lines()
             .map(|line| {
@@ -2253,7 +1853,8 @@ mod tests {
                     table
                 ))
                 .await
-                .unwrap();
+                .unwrap()
+                .1;
             let actual_count =
                 body.lines().next().unwrap().trim().parse::<usize>().expect(
                     "Expected a count of the number of rows from ClickHouse",
@@ -2301,7 +1902,8 @@ mod tests {
                 "SELECT toUInt64(1) AS foo FORMAT JSONEachRow;".to_string(),
             )
             .await
-            .unwrap();
+            .unwrap()
+            .1;
         let json: Value = serde_json::from_str(&output).unwrap();
         assert_eq!(json["foo"], Value::Number(1u64.into()));
 
@@ -3167,7 +2769,8 @@ mod tests {
         let body = client
             .execute_with_body(select_sql)
             .await
-            .expect("Failed to select field row");
+            .expect("Failed to select field row")
+            .1;
         let actual_row: serde_json::Value = serde_json::from_str(&body)
             .expect("Failed to parse field row JSON");
         println!("{actual_row:?}");
@@ -3507,7 +3110,8 @@ mod tests {
         let body = client
             .execute_with_body(select_sql)
             .await
-            .expect("Failed to select measurement row");
+            .expect("Failed to select measurement row")
+            .1;
         let (_, actual_row) = crate::model::parse_measurement_from_row(
             &body,
             measurement.datum_type(),
@@ -3528,6 +3132,7 @@ mod tests {
             )
             .await
             .expect("Failed to SELECT from database")
+            .1
             .lines()
             .count()
     }
@@ -3749,7 +3354,7 @@ mod tests {
         // one.
         let response = client.execute_with_body(
             "SELECT COUNT() FROM oximeter.timeseries_schema FORMAT JSONEachRow;
-        ").await.unwrap();
+        ").await.unwrap().1;
         assert_eq!(response.lines().count(), 1, "Expected exactly 1 schema");
         assert_eq!(client.schema.lock().await.len(), 1);
 
@@ -3766,7 +3371,7 @@ mod tests {
         // only the one schema.
         let response = client.execute_with_body(
             "SELECT COUNT() FROM oximeter.timeseries_schema FORMAT JSONEachRow;
-        ").await.unwrap();
+        ").await.unwrap().1;
         assert_eq!(
             response.lines().count(),
             1,
@@ -3804,7 +3409,7 @@ mod tests {
                 crate::DATABASE_NAME,
                 crate::model::DbDatumType::from(ty),
             );
-            let res = client.execute_with_body(sql).await.unwrap();
+            let res = client.execute_with_body(sql).await.unwrap().1;
             let count = res.trim().parse::<usize>().unwrap();
             assert_eq!(count, 0);
         }
@@ -4099,7 +3704,8 @@ mod tests {
         "
             ))
             .await
-            .unwrap();
+            .unwrap()
+            .1;
         let mut lines = body.lines();
         assert_eq!(lines.next().unwrap(), "\"col0\",\"UInt8\"");
         assert_eq!(lines.next().unwrap(), "\"col1\",\"UInt16\"");
@@ -4319,7 +3925,8 @@ mod tests {
         "
             ))
             .await
-            .unwrap();
+            .unwrap()
+            .1;
         let mut lines = body.lines();
         assert_eq!(lines.next().unwrap(), "\"col0\",\"UInt8\"");
         assert_eq!(lines.next().unwrap(), "\"col1\",\"UInt16\"");
@@ -4480,7 +4087,7 @@ mod tests {
                 crate::DATABASE_NAME,
                 crate::model::DbFieldType::from(ty),
             );
-            let res = client.execute_with_body(sql).await.unwrap();
+            let res = client.execute_with_body(sql).await.unwrap().1;
             let count = res.trim().parse::<usize>().unwrap();
             assert_eq!(count, 0);
         }
@@ -4488,6 +4095,7 @@ mod tests {
         logctx.cleanup_successful();
     }
 
+    #[cfg(any(feature = "sql", test))]
     #[tokio::test]
     async fn test_sql_query_output() {
         let logctx = test_setup_log("test_sql_query_output");
diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs
new file mode 100644
index 0000000000..9da4abd007
--- /dev/null
+++ b/oximeter/db/src/client/oxql.rs
@@ -0,0 +1,1281 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Client methods for running OxQL queries against the timeseries database.
+
+// Copyright 2024 Oxide Computer Company
+
+use super::query_summary::QuerySummary;
+use crate::client::Client;
+use crate::model;
+use crate::oxql;
+use crate::oxql::ast::table_ops::filter;
+use crate::oxql::ast::table_ops::filter::Filter;
+use crate::query::field_table_name;
+use crate::Error;
+use crate::Metric;
+use crate::Target;
+use crate::TimeseriesKey;
+use oximeter::TimeseriesSchema;
+use slog::debug;
+use slog::trace;
+use slog::Logger;
+use std::collections::BTreeMap;
+use std::time::Duration;
+use std::time::Instant;
+use uuid::Uuid;
+
+#[usdt::provider(provider = "clickhouse_client")]
+mod probes {
+    /// Fires when an OxQL query starts, with the query ID and string.
+    fn oxql__query__start(_: &usdt::UniqueId, _: &Uuid, query: &str) {}
+
+    /// Fires when an OxQL query ends, either in success or failure.
+    fn oxql__query__done(_: &usdt::UniqueId, _: &Uuid) {}
+
+    /// Fires when an OxQL table operation starts, with the query ID and details
+    /// of the operation itself.
+    fn oxql__table__op__start(_: &usdt::UniqueId, _: &Uuid, op: &str) {}
+
+    /// Fires when an OxQL table operation ends.
+    fn oxql__table__op__done(_: &usdt::UniqueId, _: &Uuid) {}
+}
+
+/// The full result of an OxQL query.
+#[derive(Clone, Debug)]
+pub struct OxqlResult {
+    /// A query ID assigned to this OxQL query.
+    pub query_id: Uuid,
+
+    /// The total duration of the OxQL query.
+    ///
+    /// This includes the time to run SQL queries against the database, and the
+    /// internal processing for each transformation in the query pipeline.
+    pub total_duration: Duration,
+
+    /// The summary for each SQL query run against the ClickHouse database.
+    ///
+    /// Each OxQL query translates into many calls to ClickHouse. We fetch the
+    /// fields; count the number of samples; and finally fetch the samples
+    /// themselves. In the future, more may be needed as well.
+    ///
+    /// This returns a list of summaries, one for each SQL query that was run.
+    /// It includes the ClickHouse-assigned query ID for correlation and looking
+    /// up in the logs.
+    pub query_summaries: Vec<QuerySummary>,
+
+    /// The list of OxQL tables returned from the query.
+    pub tables: Vec<oxql::Table>,
+}
+
+/// The maximum number of data values fetched from the database for an OxQL
+/// query.
+//
+// The `Client::oxql_query()` API is currently unpaginated. It's also not clear
+// _how_ to paginate it. The objects contributing to the size of the returned
+// value, the actual data points, are nested several layers deep, inside the
+// `Timeseries` and `Table`s. A page size is supposed to refer to the top-level
+// object, so we'd need to flatten this hierarchy for that to work. That's
+// undesirable because it will lead to a huge amount of duplication of the table
+// / timeseries-level information, once for each point.
+//
+// Also, since we cannot use a cursor-based pagination, we're stuck with
+// limit-offset. That means we may need to run substantially all of the query,
+// just to know how to retrieve the next page, sidestepping one of the main
+// goals of pagination (to limit resource usage).
+//
+// Note that it's also hard or impossible to _predict_ how much data a query
+// will use. We need to count the number of rows in the database, for example,
+// _and also_ understand how table operations might change that size. For
+// example, alignment is allowed to upsample the data (within limits), so the
+// number of rows in the database are not the only factor.
+//
+// This limit here is a crude attempt to limit just the raw data fetched from
+// ClickHouse itself. For any OxQL query, we may retrieve many measurements from
+// the database. Each time we do so, we increment a counter, and compare it to
+// this. If we exceed it, the whole query fails.
+pub const MAX_DATABASE_ROWS: u64 = 1_000_000;
+
+// When running an OxQL query, we may need to separately run several field
+// queries, to get the consistent keys independently for a range of time.
+//
+// This type stores the predicates used to generate the keys, and the keys
+// consistent with it.
+struct ConsistentKeyGroup {
+    predicates: Option<Filter>,
+    consistent_keys: BTreeMap<TimeseriesKey, (Target, Metric)>,
+}
+
+impl Client {
+    /// Run a OxQL query.
+    pub async fn oxql_query(
+        &self,
+        query: impl AsRef<str>,
+    ) -> Result<OxqlResult, Error> {
+        // TODO-security: Need a way to implement authz checks for things like
+        // viewing resources in another project or silo.
+        //
+        // I think one way to do that is look at the predicates and make sure
+        // they refer to things the user has access to. Another is to add some
+        // implicit predicates here, indicating the subset of fields that the
+        // query should be able to access.
+        //
+        // This probably means we'll need to parse the query in Nexus, so that
+        // we can attach the other filters ourselves.
+        //
+        // See https://github.com/oxidecomputer/omicron/issues/5298.
+        let query = query.as_ref();
+        let parsed_query = oxql::Query::new(query)?;
+        let query_id = Uuid::new_v4();
+        let query_log =
+            self.log.new(slog::o!("query_id" => query_id.to_string()));
+        debug!(
+            query_log,
+            "parsed OxQL query";
+            "query" => query,
+            "parsed_query" => ?parsed_query,
+        );
+        let id = usdt::UniqueId::new();
+        probes::oxql__query__start!(|| (&id, &query_id, query));
+        let mut total_rows_fetched = 0;
+        let result = self
+            .run_oxql_query(
+                &query_log,
+                query_id,
+                parsed_query,
+                &mut total_rows_fetched,
+                None,
+            )
+            .await;
+        probes::oxql__query__done!(|| (&id, &query_id));
+        result
+    }
+
+    /// Rewrite the predicates from an OxQL query so that they apply only to the
+    /// field tables.
+    fn rewrite_predicate_for_fields(
+        schema: &TimeseriesSchema,
+        preds: &filter::Filter,
+    ) -> Result<Option<String>, Error> {
+        // Walk the set of predicates, keeping those which apply to this schema.
+        match &preds.expr {
+            filter::FilterExpr::Simple(inner) => {
+                // If the predicate names a field in this timeseries schema,
+                // return that predicate printed as a string. If not, we return
+                // None.
+                let Some(field_schema) =
+                    schema.schema_for_field(inner.ident.as_str())
+                else {
+                    return Ok(None);
+                };
+                if !inner.value_type_is_compatible_with_field(
+                    field_schema.field_type,
+                ) {
+                    return Err(Error::from(anyhow::anyhow!(
+                        "Expression for field {} is not compatible with \
+                        its type {}",
+                        field_schema.name,
+                        field_schema.field_type,
+                    )));
+                }
+                Ok(Some(inner.as_db_safe_string()))
+            }
+            filter::FilterExpr::Compound(inner) => {
+                let left_pred =
+                    Self::rewrite_predicate_for_fields(schema, &inner.left)?;
+                let right_pred =
+                    Self::rewrite_predicate_for_fields(schema, &inner.right)?;
+                let out = match (left_pred, right_pred) {
+                    (Some(left), Some(right)) => Some(format!(
+                        "{}({left}, {right})",
+                        inner.op.as_db_function_name()
+                    )),
+                    (Some(single), None) | (None, Some(single)) => Some(single),
+                    (None, None) => None,
+                };
+                Ok(out)
+            }
+        }
+    }
+
+    /// Rewrite the predicates from an OxQL query so that they apply only to the
+    /// measurement table.
+    fn rewrite_predicate_for_measurements(
+        schema: &TimeseriesSchema,
+        preds: &oxql::ast::table_ops::filter::Filter,
+    ) -> Result<Option<String>, Error> {
+        // Walk the set of predicates, keeping those which apply to this schema.
+        match &preds.expr {
+            filter::FilterExpr::Simple(inner) => {
+                // The relevant columns on which we filter depend on the datum
+                // type of the timeseries. All timeseries support "timestamp".
+                let ident = inner.ident.as_str();
+                if ident == "timestamp" {
+                    if matches!(
+                        inner.value,
+                        oxql::ast::literal::Literal::Timestamp(_)
+                    ) {
+                        return Ok(Some(inner.as_db_safe_string()));
+                    }
+                    return Err(Error::from(anyhow::anyhow!(
+                        "Literal cannot be compared with a timestamp"
+                    )));
+                }
+
+                // We do not currently support filtering in the database on
+                // values, only the `timestamp` and possibly `start_time` (if
+                // the metric is cumulative).
+                if ident == "start_time" {
+                    if !schema.datum_type.is_cumulative() {
+                        return Err(Error::from(anyhow::anyhow!(
+                            "Start time can only be compared if the metric \
+                            is cumulative, but found one of type {}",
+                            schema.datum_type,
+                        )));
+                    }
+                    if matches!(
+                        inner.value,
+                        oxql::ast::literal::Literal::Timestamp(_)
+                    ) {
+                        return Ok(Some(inner.as_db_safe_string()));
+                    }
+                    return Err(Error::from(anyhow::anyhow!(
+                        "Literal cannot be compared with a timestamp"
+                    )));
+                }
+
+                // We'll delegate to the actual table op to filter on any of the
+                // data columns.
+                Ok(None)
+            }
+            filter::FilterExpr::Compound(inner) => {
+                let left_pred = Self::rewrite_predicate_for_measurements(
+                    schema,
+                    &inner.left,
+                )?;
+                let right_pred = Self::rewrite_predicate_for_measurements(
+                    schema,
+                    &inner.right,
+                )?;
+                let out = match (left_pred, right_pred) {
+                    (Some(left), Some(right)) => Some(format!(
+                        "{}({left}, {right})",
+                        inner.op.as_db_function_name()
+                    )),
+                    (Some(single), None) | (None, Some(single)) => Some(single),
+                    (None, None) => None,
+                };
+                Ok(out)
+            }
+        }
+    }
+
+    // Run one query.
+    //
+    // If the query is flat, run it directly. If it's nested, run each of them;
+    // concatenate the results; and then apply all the remaining
+    // transformations.
+    #[async_recursion::async_recursion]
+    async fn run_oxql_query(
+        &self,
+        query_log: &Logger,
+        query_id: Uuid,
+        query: oxql::Query,
+        total_rows_fetched: &mut u64,
+        outer_predicates: Option<Filter>,
+    ) -> Result<OxqlResult, Error> {
+        let split = query.split();
+        if let oxql::ast::SplitQuery::Nested { subqueries, transformations } =
+            split
+        {
+            trace!(
+                query_log,
+                "OxQL query contains subqueries, running recursively"
+            );
+            // Create the new set of outer predicates to pass in to the
+            // subquery, by merging the previous outer predicates with those of
+            // the transformation portion of this nested query.
+            let new_outer_predicates =
+                query.coalesced_predicates(outer_predicates.clone());
+
+            // Run each subquery recursively, and extend the results
+            // accordingly.
+            let mut query_summaries = Vec::with_capacity(subqueries.len());
+            let mut tables = Vec::with_capacity(subqueries.len());
+            let query_start = Instant::now();
+            for subq in subqueries.into_iter() {
+                let res = self
+                    .run_oxql_query(
+                        query_log,
+                        query_id,
+                        subq,
+                        total_rows_fetched,
+                        new_outer_predicates.clone(),
+                    )
+                    .await?;
+                query_summaries.extend(res.query_summaries);
+                tables.extend(res.tables);
+            }
+            for tr in transformations.into_iter() {
+                trace!(
+                    query_log,
+                    "applying query transformation";
+                    "transformation" => ?tr,
+                );
+                let id = usdt::UniqueId::new();
+                probes::oxql__table__op__start!(|| (
+                    &id,
+                    &query_id,
+                    format!("{tr:?}")
+                ));
+                let new_tables = tr.apply(&tables, query.end_time());
+                probes::oxql__table__op__done!(|| (&id, &query_id));
+                tables = new_tables?;
+            }
+            let result = OxqlResult {
+                query_id,
+                total_duration: query_start.elapsed(),
+                query_summaries,
+                tables,
+            };
+            return Ok(result);
+        }
+
+        // This is a flat query, let's just run it directly. First step is
+        // getting the schema itself.
+        let query_start = Instant::now();
+        let oxql::ast::SplitQuery::Flat(query) = split else {
+            unreachable!();
+        };
+        let name = query.timeseries_name();
+        let Some(schema) = self.schema_for_timeseries(name).await? else {
+            return Err(Error::TimeseriesNotFound(name.to_string()));
+        };
+        debug!(
+            query_log,
+            "running flat OxQL query";
+            "query" => ?query,
+            "timeseries_name" => %name,
+        );
+
+        // Fetch the consistent fields (including keys) for this timeseries,
+        // including filtering them based on the predicates in the query
+        // that apply to this timeseries in particular. We also need to merge
+        // them in with the predicates passed in from a possible outer query.
+        let preds = query.coalesced_predicates(outer_predicates.clone());
+        debug!(
+            query_log,
+            "coalesced predicates from flat query";
+            "outer_predicates" => ?&outer_predicates,
+            "coalesced" => ?&preds,
+        );
+
+        // We generally run a few SQL queries for each OxQL query:
+        //
+        // - Some number of queries to fetch the timeseries keys that are
+        // consistent with it.
+        // - Fetch the consistent samples.
+        //
+        // Note that there are often 2 or more queries needed for the first
+        // case. In particular, there is one query required for each independent
+        // time range in the query (including when a time range isn't
+        // specified).
+        //
+        // For example, consider the filter operation:
+        //
+        // ```
+        // filter some_predicate || (timestamp > @now() - 1m && other_predicate)
+        // ```
+        //
+        // That is, we return all timepoints for things where `some_predicate`
+        // is true, and only the last minute for those satisfying
+        // `other_predicate`. If we simply drop the timestamp filter, and run
+        // the two predicates conjoined, we would erroneously return only the
+        // last minute for everything, including those satisfying
+        // `some_predicate`.
+        //
+        // So instead, we need to run one query for each of those, fetch the
+        // keys associated with it, and then independently select the
+        // measurements satisfying both the time range and key-consistency
+        // constraints. Thankfully that can be done in one query, albeit a
+        // complicated one.
+        //
+        // Convert any outer predicates to DNF, and split into disjoint key
+        // groups for the measurement queries.
+        let disjoint_predicates = if let Some(preds) = preds.as_ref() {
+            let simplified = preds.simplify_to_dnf()?;
+            debug!(
+                query_log,
+                "simplified filtering predicates to disjunctive normal form";
+                "original" => %preds,
+                "DNF" => %simplified,
+            );
+            simplified
+                .flatten_disjunctions()
+                .into_iter()
+                .map(Option::Some)
+                .collect()
+        } else {
+            // There are no outer predicates, so we have 1 disjoint key group,
+            // with no predicates.
+            vec![None]
+        };
+
+        // Run each query group indepdendently, keeping the predicates and the
+        // timeseries keys corresponding to it.
+        let mut consistent_key_groups =
+            Vec::with_capacity(1 + disjoint_predicates.len());
+        let mut query_summaries =
+            Vec::with_capacity(1 + disjoint_predicates.len());
+        for predicates in disjoint_predicates.into_iter() {
+            debug!(
+                query_log,
+                "running disjoint query predicate";
+                "predicate" => predicates.as_ref().map(|s| s.to_string()).unwrap_or("none".into()),
+            );
+            let all_fields_query =
+                self.all_fields_query(&schema, predicates.as_ref())?;
+            let (summary, consistent_keys) = self
+                .select_matching_timeseries_info(&all_fields_query, &schema)
+                .await?;
+            debug!(
+                query_log,
+                "fetched information for matching timeseries keys";
+                "n_keys" => consistent_keys.len(),
+            );
+            query_summaries.push(summary);
+
+            // If there are no consistent keys, move to the next independent
+            // query chunk.
+            if consistent_keys.is_empty() {
+                continue;
+            }
+
+            // Push the disjoint filter itself, plus the keys consistent with
+            // it.
+            consistent_key_groups
+                .push(ConsistentKeyGroup { predicates, consistent_keys });
+        }
+
+        // If there are no consistent keys _at all_, we can just return an empty
+        // table.
+        if consistent_key_groups.is_empty() {
+            let result = OxqlResult {
+                query_id,
+                total_duration: query_start.elapsed(),
+                query_summaries,
+                tables: vec![oxql::Table::new(schema.timeseries_name.as_str())],
+            };
+            return Ok(result);
+        }
+
+        // Fetch the consistent measurements for this timeseries, by key group.
+        //
+        // We'll keep track of all the measurements for this timeseries schema,
+        // organized by timeseries key. That's because we fetch all consistent
+        // samples at once, so we get many concrete _timeseries_ in the returned
+        // response, even though they're all from the same schema.
+        let (summary, timeseries_by_key) = self
+            .select_matching_samples(
+                query_log,
+                &schema,
+                &consistent_key_groups,
+                total_rows_fetched,
+            )
+            .await?;
+        query_summaries.push(summary);
+
+        // At this point, let's construct a set of tables and run the results
+        // through the transformation pipeline.
+        let mut tables = vec![oxql::Table::from_timeseries(
+            schema.timeseries_name.as_str(),
+            timeseries_by_key.into_values(),
+        )?];
+
+        let transformations = query.transformations();
+        debug!(
+            query_log,
+            "constructed OxQL table, starting transformation pipeline";
+            "name" => tables[0].name(),
+            "n_timeseries" => tables[0].n_timeseries(),
+            "n_transformations" => transformations.len(),
+        );
+        for tr in transformations {
+            trace!(
+                query_log,
+                "applying query transformation";
+                "transformation" => ?tr,
+            );
+            let id = usdt::UniqueId::new();
+            probes::oxql__table__op__start!(|| (
+                &id,
+                &query_id,
+                format!("{tr:?}")
+            ));
+            let new_tables = tr.apply(&tables, query.end_time());
+            probes::oxql__table__op__done!(|| (&id, &query_id));
+            tables = new_tables?;
+        }
+        let result = OxqlResult {
+            query_id,
+            total_duration: query_start.elapsed(),
+            query_summaries,
+            tables,
+        };
+        Ok(result)
+    }
+
+    // Select samples matching the set of predicates and consistent keys.
+    //
+    // Note that this also implements the conversion from cumulative to gauge
+    // samples, depending on how data was requested.
+    async fn select_matching_samples(
+        &self,
+        query_log: &Logger,
+        schema: &TimeseriesSchema,
+        consistent_key_groups: &[ConsistentKeyGroup],
+        total_rows_fetched: &mut u64,
+    ) -> Result<(QuerySummary, BTreeMap<TimeseriesKey, oxql::Timeseries>), Error>
+    {
+        // We'll create timeseries for each key on the fly. To enable computing
+        // deltas, we need to track the last measurement we've seen as well.
+        let mut measurements_by_key: BTreeMap<_, Vec<_>> = BTreeMap::new();
+        let measurements_query = self.measurements_query(
+            schema,
+            consistent_key_groups,
+            total_rows_fetched,
+        )?;
+        let mut n_measurements: u64 = 0;
+        let (summary, body) =
+            self.execute_with_body(&measurements_query).await?;
+        for line in body.lines() {
+            let (key, measurement) =
+                model::parse_measurement_from_row(line, schema.datum_type);
+            measurements_by_key.entry(key).or_default().push(measurement);
+            n_measurements += 1;
+        }
+        debug!(
+            query_log,
+            "fetched measurements for OxQL query";
+            "n_keys" => measurements_by_key.len(),
+            "n_measurements" => n_measurements,
+        );
+
+        // At this point, we need to check that we're still within our maximum
+        // result size. The measurement query we issued limited the returned
+        // result to 1 more than the remainder on our allotment. So if we get
+        // exactly that limit, we know that there are more rows than we can
+        // allow. We don't know how many more, but we don't care, and we fail
+        // the query regardless.
+        update_total_rows_and_check(
+            query_log,
+            total_rows_fetched,
+            n_measurements,
+        )?;
+
+        // At this point, we no longer care about the consistent_key groups. We
+        // throw away the predicates that distinguished them, and merge the
+        // timeseries information together.
+        let info = consistent_key_groups
+            .iter()
+            .map(|group| group.consistent_keys.clone())
+            .reduce(|mut acc, current| {
+                acc.extend(current);
+                acc
+            })
+            .expect("Should have at least one key-group for every query");
+
+        // Remove the last measurement, returning just the keys and timeseries.
+        let mut out = BTreeMap::new();
+        for (key, measurements) in measurements_by_key.into_iter() {
+            // Constuct a new timeseries, from the target/metric info.
+            let (target, metric) = info.get(&key).unwrap();
+            let mut timeseries = oxql::Timeseries::new(
+                target
+                    .fields
+                    .iter()
+                    .chain(metric.fields.iter())
+                    .map(|field| (field.name.clone(), field.value.clone())),
+                oxql::point::DataType::try_from(schema.datum_type)?,
+                if schema.datum_type.is_cumulative() {
+                    oxql::point::MetricType::Delta
+                } else {
+                    oxql::point::MetricType::Gauge
+                },
+            )?;
+
+            // Covert its oximeter measurements into OxQL data types.
+            let points = if schema.datum_type.is_cumulative() {
+                oxql::point::Points::delta_from_cumulative(&measurements)?
+            } else {
+                oxql::point::Points::gauge_from_gauge(&measurements)?
+            };
+            timeseries.points = points;
+            debug!(
+                query_log,
+                "inserted new OxQL timeseries";
+                "key" => key,
+                "metric_type" => ?timeseries.points.metric_type(),
+                "n_points" => timeseries.points.len(),
+            );
+            out.insert(key, timeseries);
+        }
+        Ok((summary, out))
+    }
+
+    fn measurements_query(
+        &self,
+        schema: &TimeseriesSchema,
+        consistent_key_groups: &[ConsistentKeyGroup],
+        total_rows_fetched: &mut u64,
+    ) -> Result<String, Error> {
+        use std::fmt::Write;
+
+        // Build the base query, which just selects the timeseries by name based
+        // on the datum type.
+        let mut query = self.measurements_query_raw(schema.datum_type);
+        query.push_str(" WHERE timeseries_name = '");
+        write!(query, "{}", schema.timeseries_name).unwrap();
+        query.push('\'');
+
+        // Filter down the fields to those which apply to the data itself, which
+        // includes the timestamps and data values. The supported fields here
+        // depend on the datum type.
+        //
+        // We join all the consistent key groups with OR, which mirrors how they
+        // were split originally.
+        let all_predicates = consistent_key_groups
+            .iter()
+            .map(|group| {
+                // Write out the predicates on the measurements themselves,
+                // which really refers to the timestamps (and possibly start
+                // times).
+                let maybe_predicates = group
+                    .predicates
+                    .as_ref()
+                    .map(|preds| {
+                        Self::rewrite_predicate_for_measurements(schema, preds)
+                    })
+                    .transpose()?
+                    .flatten();
+
+                // Push the predicate that selects the timeseries keys, which
+                // are unique to this group.
+                let maybe_key_set = if group.consistent_keys.len() > 0 {
+                    let mut chunk = String::from("timeseries_key IN (");
+                    let keys = group
+                        .consistent_keys
+                        .keys()
+                        .map(ToString::to_string)
+                        .collect::<Vec<_>>()
+                        .join(",");
+                    chunk.push_str(&keys);
+                    chunk.push(')');
+                    Some(chunk)
+                } else {
+                    None
+                };
+
+                let chunk = match (maybe_predicates, maybe_key_set) {
+                    (Some(preds), None) => preds,
+                    (None, Some(key_set)) => key_set,
+                    (Some(preds), Some(key_set)) => {
+                        format!("({preds} AND {key_set})")
+                    }
+                    (None, None) => String::new(),
+                };
+                Ok(chunk)
+            })
+            .collect::<Result<Vec<_>, Error>>()?
+            .join(" OR ");
+        if !all_predicates.is_empty() {
+            query.push_str(" AND (");
+            query.push_str(&all_predicates);
+            query.push(')');
+        }
+
+        // Always impose a strong order on these fields.
+        //
+        // The tables are all sorted by:
+        //
+        // - timeseries_name
+        // - timeseries_key
+        // - start_time, if present
+        // - timestamp
+        //
+        // We care most about the timestamp ordering, since that is assumed (and
+        // asserted) by downstream table operations. We use the full sort order
+        // of the table, however, to make things the most efficient.
+        query.push_str(" ORDER BY timeseries_key");
+        if schema.datum_type.is_cumulative() {
+            query.push_str(", start_time");
+        }
+        query.push_str(", timestamp");
+
+        // Push a limit clause, which restricts the number of records we could
+        // return.
+        //
+        // This is used to ensure that we never go above the limit in
+        // `MAX_RESULT_SIZE`. That restricts the _total_ number of rows we want
+        // to retch from the database. So we set our limit to be one more than
+        // the remainder on our allotment. If we get exactly as many as we set
+        // in the limit, then we fail the query because there are more rows that
+        // _would_ be returned. We don't know how many more, but there is at
+        // least 1 that pushes us over the limit. This prevents tricky
+        // TOCTOU-like bugs where we need to check the limit twice, and improves
+        // performance, since we don't return much more than we could possibly
+        // handle.
+        let remainder = MAX_DATABASE_ROWS - *total_rows_fetched;
+        query.push_str(" LIMIT ");
+        write!(query, "{}", remainder + 1).unwrap();
+
+        // Finally, use JSON format.
+        query.push_str(" FORMAT ");
+        query.push_str(crate::DATABASE_SELECT_FORMAT);
+        Ok(query)
+    }
+
+    fn measurements_query_raw(
+        &self,
+        datum_type: oximeter::DatumType,
+    ) -> String {
+        let value_columns = if datum_type.is_histogram() {
+            "timeseries_key, start_time, timestamp, bins, counts"
+        } else if datum_type.is_cumulative() {
+            "timeseries_key, start_time, timestamp, datum"
+        } else {
+            "timeseries_key, timestamp, datum"
+        };
+        format!(
+            "SELECT {} \
+            FROM {}.{}",
+            value_columns,
+            crate::DATABASE_NAME,
+            crate::query::measurement_table_name(datum_type),
+        )
+    }
+
+    fn all_fields_query(
+        &self,
+        schema: &TimeseriesSchema,
+        preds: Option<&oxql::ast::table_ops::filter::Filter>,
+    ) -> Result<String, Error> {
+        // Filter down the fields to those which apply to this timeseries
+        // itself, and rewrite as a DB-safe WHERE clause.
+        let preds_for_fields = preds
+            .map(|p| Self::rewrite_predicate_for_fields(schema, p))
+            .transpose()?
+            .flatten();
+        let (already_has_where, mut query) = self.all_fields_query_raw(schema);
+        if let Some(preds) = preds_for_fields {
+            // If the raw field has only a single select query, then we've
+            // already added a "WHERE" clause. Simply tack these predicates onto
+            // that one.
+            if already_has_where {
+                query.push_str(" AND ");
+            } else {
+                query.push_str(" WHERE ");
+            }
+            query.push_str(&preds);
+        }
+        query.push_str(" FORMAT ");
+        query.push_str(crate::DATABASE_SELECT_FORMAT);
+        Ok(query)
+    }
+
+    fn all_fields_query_raw(
+        &self,
+        schema: &TimeseriesSchema,
+    ) -> (bool, String) {
+        match schema.field_schema.len() {
+            0 => unreachable!(),
+            1 => {
+                let field_schema = schema.field_schema.first().unwrap();
+                (
+                    true,
+                    format!(
+                        "SELECT DISTINCT timeseries_key, field_value AS {field_name} \
+                        FROM {db_name}.{field_table} \
+                        WHERE \
+                            timeseries_name = '{timeseries_name}' AND \
+                            field_name = '{field_name}'",
+                        field_name = field_schema.name,
+                        db_name = crate::DATABASE_NAME,
+                        field_table = field_table_name(field_schema.field_type),
+                        timeseries_name = schema.timeseries_name,
+                    )
+                )
+            }
+            _ => {
+                let mut top_level_columns =
+                    Vec::with_capacity(schema.field_schema.len());
+                let mut field_subqueries =
+                    Vec::with_capacity(schema.field_schema.len());
+
+                // Select each field value, aliasing it to its field name.
+                for field_schema in schema.field_schema.iter() {
+                    top_level_columns.push(format!(
+                        "filter_on_{}.field_value AS {}",
+                        field_schema.name, field_schema.name
+                    ));
+                    field_subqueries.push((
+                        format!(
+                            "SELECT DISTINCT timeseries_key, field_value \
+                                FROM {db_name}.{field_table} \
+                                WHERE \
+                                    timeseries_name = '{timeseries_name}' AND \
+                                    field_name = '{field_name}' \
+                                ",
+                            db_name = crate::DATABASE_NAME,
+                            field_table =
+                                field_table_name(field_schema.field_type),
+                            timeseries_name = schema.timeseries_name,
+                            field_name = field_schema.name,
+                        ),
+                        format!("filter_on_{}", field_schema.name),
+                    ));
+                }
+
+                // Write the top-level select statement, starting by selecting
+                // the timeseries key from the first field schema.
+                let mut out = format!(
+                    "SELECT {}.timeseries_key AS timeseries_key, {} FROM ",
+                    field_subqueries[0].1,
+                    top_level_columns.join(", "),
+                );
+
+                // Then add all the subqueries selecting each field.
+                //
+                // We need to add these, along with their aliases. The first
+                // such subquery has no join conditions, but the later ones all
+                // refer to the previous via:
+                //
+                // `ON <previous_filter_name>.timeseries_key = <current_filter_name>.timeseries_key`
+                for (i, (subq, alias)) in field_subqueries.iter().enumerate() {
+                    // Push the subquery itself, aliased.
+                    out.push('(');
+                    out.push_str(subq);
+                    out.push_str(") AS ");
+                    out.push_str(alias);
+
+                    // Push the join conditions.
+                    if i > 0 {
+                        let previous_alias = &field_subqueries[i - 1].1;
+                        out.push_str(" ON ");
+                        out.push_str(alias);
+                        out.push_str(".timeseries_key = ");
+                        out.push_str(previous_alias);
+                        out.push_str(".timeseries_key");
+                    }
+
+                    // Push the "INNER JOIN" expression itself, for all but the
+                    // last subquery.
+                    if i < field_subqueries.len() - 1 {
+                        out.push_str(" INNER JOIN ");
+                    }
+                }
+                (false, out)
+            }
+        }
+    }
+}
+
+// Helper to update the number of total rows fetched so far, and check it's
+// still under the limit.
+fn update_total_rows_and_check(
+    query_log: &Logger,
+    total_rows_fetched: &mut u64,
+    count: u64,
+) -> Result<(), Error> {
+    *total_rows_fetched += count;
+    if *total_rows_fetched > MAX_DATABASE_ROWS {
+        return Err(Error::from(anyhow::anyhow!(
+            "Query requires fetching more than the \
+            current limit of {} data points from the \
+            timeseries database",
+            MAX_DATABASE_ROWS,
+        )));
+    }
+    trace!(
+        query_log,
+        "verified OxQL measurement query returns few enough results";
+        "n_new_measurements" => count,
+        "n_total" => *total_rows_fetched,
+        "limit" => MAX_DATABASE_ROWS,
+    );
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use chrono::{DateTime, Utc};
+    use dropshot::test_util::LogContext;
+    use omicron_test_utils::dev::clickhouse::ClickHouseInstance;
+    use omicron_test_utils::dev::test_setup_log;
+    use oximeter::Sample;
+    use oximeter::{types::Cumulative, FieldValue};
+    use std::collections::BTreeMap;
+    use std::time::Duration;
+
+    use crate::{
+        oxql::{point::Points, Table, Timeseries},
+        Client, DbWrite,
+    };
+
+    #[derive(
+        Clone, Debug, Eq, PartialEq, PartialOrd, Ord, oximeter::Target,
+    )]
+    struct SomeTarget {
+        name: String,
+        index: u32,
+    }
+
+    #[derive(Clone, Debug, oximeter::Metric)]
+    struct SomeMetric {
+        foo: i32,
+        datum: Cumulative<u64>,
+    }
+
+    #[derive(Clone, Debug)]
+    #[allow(dead_code)]
+    struct TestData {
+        targets: Vec<SomeTarget>,
+        // Note that we really want all the samples per metric _field_, not the
+        // full metric. That would give us a 1-element sample array for each.
+        samples_by_timeseries: BTreeMap<(SomeTarget, i32), Vec<Sample>>,
+        first_timestamp: DateTime<Utc>,
+    }
+
+    struct TestContext {
+        logctx: LogContext,
+        clickhouse: ClickHouseInstance,
+        client: Client,
+        test_data: TestData,
+    }
+
+    impl TestContext {
+        async fn cleanup_successful(mut self) {
+            self.clickhouse
+                .cleanup()
+                .await
+                .expect("Failed to cleanup ClickHouse server");
+            self.logctx.cleanup_successful();
+        }
+    }
+
+    const N_SAMPLES_PER_TIMESERIES: usize = 16;
+    const SAMPLE_INTERVAL: Duration = Duration::from_secs(1);
+    const SHIFT: Duration = Duration::from_secs(1);
+
+    fn format_timestamp(t: DateTime<Utc>) -> String {
+        format!("{}", t.format("%Y-%m-%dT%H:%M:%S.%f"))
+    }
+
+    fn generate_test_samples() -> TestData {
+        // We'll test with 4 different targets, each with two values for its
+        // fields.
+        let mut targets = Vec::with_capacity(4);
+        let names = &["first-target", "second-target"];
+        let indices = 1..3;
+        for (name, index) in itertools::iproduct!(names, indices) {
+            let target = SomeTarget { name: name.to_string(), index };
+            targets.push(target);
+        }
+
+        // Create a start time for all samples.
+        //
+        // IMPORTANT: There is a TTL of 30 days on all data currently. I would
+        // love this to be a fixed, well-known start time, to make tests easier,
+        // but that's in conflict with the TTL. Instead, we'll use midnight on
+        // the current day, and then store it in the test data context.
+        let first_timestamp =
+            Utc::now().date_naive().and_hms_opt(0, 0, 0).unwrap().and_utc();
+
+        // For simplicity, we'll also assume all the cumulative measurements
+        // start at the first timestamp as well.
+        let datum = Cumulative::with_start_time(first_timestamp, 0);
+
+        // We'll create two separate metrics, with 16 samples each.
+        let foos = [-1, 1];
+        let mut samples_by_timeseries = BTreeMap::new();
+        let mut timeseries_index = 0;
+        for target in targets.iter() {
+            for foo in foos.iter() {
+                // Shift this timeseries relative to the others, to ensure we
+                // have some different timestamps.
+                let timeseries_start =
+                    first_timestamp + timeseries_index * SHIFT;
+
+                // Create the first metric, starting from a count of 0.
+                let mut metric = SomeMetric { foo: *foo, datum };
+
+                // Create all the samples,, incrementing the datum and sample
+                // time.
+                for i in 0..N_SAMPLES_PER_TIMESERIES {
+                    let sample_time =
+                        timeseries_start + SAMPLE_INTERVAL * i as u32;
+                    let sample = Sample::new_with_timestamp(
+                        sample_time,
+                        target,
+                        &metric,
+                    )
+                    .unwrap();
+                    samples_by_timeseries
+                        .entry((target.clone(), *foo))
+                        .or_insert_with(|| {
+                            Vec::with_capacity(N_SAMPLES_PER_TIMESERIES)
+                        })
+                        .push(sample);
+                    metric.datum += 1;
+                }
+                timeseries_index += 1;
+            }
+        }
+        TestData { targets, samples_by_timeseries, first_timestamp }
+    }
+
+    async fn setup_oxql_test(name: &str) -> TestContext {
+        let logctx = test_setup_log(name);
+        let db = ClickHouseInstance::new_single_node(&logctx, 0)
+            .await
+            .expect("Failed to start ClickHouse");
+        let client = Client::new(db.address, &logctx.log);
+        client
+            .init_single_node_db()
+            .await
+            .expect("Failed to init single-node oximeter database");
+        let test_data = generate_test_samples();
+        let samples: Vec<_> = test_data
+            .samples_by_timeseries
+            .values()
+            .flatten()
+            .cloned()
+            .collect();
+        client
+            .insert_samples(&samples)
+            .await
+            .expect("Failed to insert test data");
+        TestContext { logctx, clickhouse: db, client, test_data }
+    }
+
+    #[tokio::test]
+    async fn test_get_entire_table() {
+        let ctx = setup_oxql_test("test_get_entire_table").await;
+        let query = "get some_target:some_metric";
+        let result = ctx
+            .client
+            .oxql_query(query)
+            .await
+            .expect("failed to run OxQL query");
+        assert_eq!(result.tables.len(), 1, "Should be exactly 1 table");
+        let table = result.tables.get(0).unwrap();
+        assert_eq!(
+            table.n_timeseries(),
+            ctx.test_data.samples_by_timeseries.len(),
+            "Should have fetched every timeseries"
+        );
+        assert!(
+            table.iter().all(|t| t.points.len() == N_SAMPLES_PER_TIMESERIES),
+            "Should have fetched all points for all timeseries"
+        );
+
+        // Let's build the expected point array, from each timeseries we
+        // inserted.
+        let mut matched_timeseries = 0;
+        for ((target, foo), samples) in
+            ctx.test_data.samples_by_timeseries.iter()
+        {
+            let measurements: Vec<_> =
+                samples.iter().map(|s| s.measurement.clone()).collect();
+            let expected_points = Points::delta_from_cumulative(&measurements)
+                .expect(
+                "failed to create expected points from inserted measurements",
+            );
+            let expected_timeseries =
+                find_timeseries_in_table(&table, target, foo)
+                    .expect("Table did not contain an expected timeseries");
+            assert_eq!(
+                expected_timeseries.points, expected_points,
+                "Did not reconstruct the correct points for this timeseries"
+            );
+            matched_timeseries += 1;
+        }
+        assert_eq!(matched_timeseries, table.len());
+        assert_eq!(
+            matched_timeseries,
+            ctx.test_data.samples_by_timeseries.len()
+        );
+
+        ctx.cleanup_successful().await;
+    }
+
+    #[tokio::test]
+    async fn test_get_one_timeseries() {
+        let ctx = setup_oxql_test("test_get_one_timeseries").await;
+
+        // Specify exactly one timeseries we _want_ to fetch, by picking the
+        // first timeseries we inserted.
+        let ((expected_target, expected_foo), expected_samples) =
+            ctx.test_data.samples_by_timeseries.first_key_value().unwrap();
+        let query = format!(
+            "get some_target:some_metric | filter {}",
+            exact_filter_for(expected_target, *expected_foo)
+        );
+        let result = ctx
+            .client
+            .oxql_query(&query)
+            .await
+            .expect("failed to run OxQL query");
+        assert_eq!(result.tables.len(), 1, "Should be exactly 1 table");
+        let table = result.tables.get(0).unwrap();
+        assert_eq!(
+            table.n_timeseries(),
+            1,
+            "Should have fetched exactly the target timeseries"
+        );
+        assert!(
+            table.iter().all(|t| t.points.len() == N_SAMPLES_PER_TIMESERIES),
+            "Should have fetched all points for all timeseries"
+        );
+
+        let expected_timeseries =
+            find_timeseries_in_table(&table, expected_target, expected_foo)
+                .expect("Table did not contain expected timeseries");
+        let measurements: Vec<_> =
+            expected_samples.iter().map(|s| s.measurement.clone()).collect();
+        let expected_points = Points::delta_from_cumulative(&measurements)
+            .expect("failed to build expected points from measurements");
+        assert_eq!(
+            expected_points, expected_timeseries.points,
+            "Did not reconstruct the correct points for the one \
+            timeseries the query fetched"
+        );
+
+        ctx.cleanup_successful().await;
+    }
+
+    // In this test, we'll fetch the entire history of one timeseries, and only
+    // the last few samples of another.
+    //
+    // This checks that we correctly do complex logical operations that require
+    // fetching different sets of fields at different times.
+    #[tokio::test]
+    async fn test_get_entire_timeseries_and_part_of_another() {
+        usdt::register_probes().unwrap();
+        let ctx =
+            setup_oxql_test("test_get_entire_timeseries_and_part_of_another")
+                .await;
+
+        let mut it = ctx.test_data.samples_by_timeseries.iter();
+        let (entire, only_part) = (it.next().unwrap(), it.next().unwrap());
+
+        let entire_filter = exact_filter_for(&entire.0 .0, entire.0 .1);
+        let only_part_filter =
+            exact_filter_for(&only_part.0 .0, only_part.0 .1);
+        let start_timestamp = only_part.1[6].measurement.timestamp();
+        let only_part_timestamp_filter = format_timestamp(start_timestamp);
+
+        let query = format!(
+            "get some_target:some_metric | filter ({}) || (timestamp >= @{} && {})",
+            entire_filter,
+            only_part_timestamp_filter,
+            only_part_filter,
+        );
+        let result = ctx
+            .client
+            .oxql_query(&query)
+            .await
+            .expect("failed to run OxQL query");
+        assert_eq!(result.tables.len(), 1, "Should be exactly 1 table");
+        let table = result.tables.get(0).unwrap();
+        assert_eq!(
+            table.n_timeseries(),
+            2,
+            "Should have fetched exactly the two target timeseries"
+        );
+
+        // Check that we fetched the entire timeseries for the first one.
+        let expected_timeseries =
+            find_timeseries_in_table(table, &entire.0 .0, &entire.0 .1)
+                .expect("failed to fetch all of the first timeseries");
+        let measurements: Vec<_> =
+            entire.1.iter().map(|s| s.measurement.clone()).collect();
+        let expected_points = Points::delta_from_cumulative(&measurements)
+            .expect("failed to build expected points");
+        assert_eq!(
+            expected_timeseries.points, expected_points,
+            "Did not collect the entire set of points for the first timeseries",
+        );
+
+        // And that we only get the last portion of the second timeseries.
+        let expected_timeseries =
+            find_timeseries_in_table(table, &only_part.0 .0, &only_part.0 .1)
+                .expect("failed to fetch part of the second timeseries");
+        let measurements: Vec<_> = only_part
+            .1
+            .iter()
+            .filter_map(|sample| {
+                let meas = &sample.measurement;
+                if meas.timestamp() >= start_timestamp {
+                    Some(meas.clone())
+                } else {
+                    None
+                }
+            })
+            .collect();
+        let expected_points = Points::delta_from_cumulative(&measurements)
+            .expect("failed to build expected points");
+        assert_eq!(
+            expected_timeseries.points, expected_points,
+            "Did not collect the last few points for the second timeseries",
+        );
+
+        ctx.cleanup_successful().await;
+    }
+
+    // Return an OxQL filter item that will exactly select the provided
+    // timeseries by its target / metric.
+    fn exact_filter_for(target: &SomeTarget, foo: i32) -> String {
+        format!(
+            "name == '{}' && index == {} && foo == {}",
+            target.name, target.index, foo,
+        )
+    }
+
+    // Given a table from an OxQL query, look up the timeseries for the inserted
+    // target / metric, if it exists
+    fn find_timeseries_in_table<'a>(
+        table: &'a Table,
+        target: &'a SomeTarget,
+        foo: &'a i32,
+    ) -> Option<&'a Timeseries> {
+        for timeseries in table.iter() {
+            let fields = &timeseries.fields;
+
+            // Look up each field in turn, and compare it.
+            let FieldValue::String(val) = fields.get("name")? else {
+                unreachable!();
+            };
+            if val != &target.name {
+                continue;
+            }
+            let FieldValue::U32(val) = fields.get("index")? else {
+                unreachable!();
+            };
+            if val != &target.index {
+                continue;
+            }
+            let FieldValue::I32(val) = fields.get("foo")? else {
+                unreachable!();
+            };
+            if val != foo {
+                continue;
+            }
+
+            // We done matched it.
+            return Some(timeseries);
+        }
+        None
+    }
+}
diff --git a/oximeter/db/src/client/query_summary.rs b/oximeter/db/src/client/query_summary.rs
new file mode 100644
index 0000000000..b00a11c38e
--- /dev/null
+++ b/oximeter/db/src/client/query_summary.rs
@@ -0,0 +1,123 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Types representing summaries of queries against the timeseries database.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::Error;
+use reqwest::header::HeaderMap;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+use uuid::Uuid;
+
+/// A count of bytes / rows accessed during a query.
+#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)]
+pub struct IoCount {
+    /// The number of bytes accessed.
+    pub bytes: u64,
+    /// The number of rows accessed.
+    pub rows: u64,
+}
+
+impl std::fmt::Display for IoCount {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "{} rows ({} bytes)", self.rows, self.bytes)
+    }
+}
+
+/// Summary of the I/O resources used by a query.
+#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)]
+#[serde(try_from = "serde_json::Value")]
+pub struct IoSummary {
+    /// The bytes and rows read by the query.
+    pub read: IoCount,
+    /// The bytes and rows written by the query.
+    pub written: IoCount,
+}
+
+impl TryFrom<serde_json::Value> for IoSummary {
+    type Error = Error;
+
+    fn try_from(j: serde_json::Value) -> Result<Self, Self::Error> {
+        use serde_json::Map;
+        use serde_json::Value;
+        use std::str::FromStr;
+
+        let Value::Object(map) = j else {
+            return Err(Error::Database(String::from(
+                "Expected a JSON object for a metadata summary",
+            )));
+        };
+
+        fn unpack_summary_value<T>(
+            map: &Map<String, Value>,
+            key: &str,
+        ) -> Result<T, Error>
+        where
+            T: FromStr,
+            <T as FromStr>::Err: std::error::Error,
+        {
+            let value = map.get(key).ok_or_else(|| {
+                Error::MissingHeaderKey { key: key.to_string() }
+            })?;
+            let Value::String(v) = value else {
+                return Err(Error::BadMetadata {
+                    key: key.to_string(),
+                    msg: String::from("Expected a string value"),
+                });
+            };
+            v.parse::<T>().map_err(|e| Error::BadMetadata {
+                key: key.to_string(),
+                msg: e.to_string(),
+            })
+        }
+        let rows_read: u64 = unpack_summary_value(&map, "read_rows")?;
+        let bytes_read: u64 = unpack_summary_value(&map, "read_bytes")?;
+        let rows_written: u64 = unpack_summary_value(&map, "written_rows")?;
+        let bytes_written: u64 = unpack_summary_value(&map, "written_bytes")?;
+        Ok(Self {
+            read: IoCount { bytes: bytes_read, rows: rows_read },
+            written: IoCount { bytes: bytes_written, rows: rows_written },
+        })
+    }
+}
+
+/// Basic metadata about the resource usage of a single SQL query.
+#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)]
+pub struct QuerySummary {
+    /// The database-assigned query ID.
+    pub id: Uuid,
+    /// The total duration of the query (network plus execution).
+    pub elapsed: Duration,
+    /// Summary of the data read and written.
+    pub io_summary: IoSummary,
+}
+
+impl QuerySummary {
+    /// Construct a SQL query summary from the headers received from the DB.
+    pub(crate) fn from_headers(
+        elapsed: Duration,
+        headers: &HeaderMap,
+    ) -> Result<Self, Error> {
+        fn get_header<'a>(
+            map: &'a HeaderMap,
+            key: &'a str,
+        ) -> Result<&'a str, Error> {
+            let hdr = map.get(key).ok_or_else(|| Error::MissingHeaderKey {
+                key: key.to_string(),
+            })?;
+            std::str::from_utf8(hdr.as_bytes())
+                .map_err(|err| Error::Database(err.to_string()))
+        }
+        let summary =
+            serde_json::from_str(get_header(headers, "X-ClickHouse-Summary")?)
+                .map_err(|err| Error::Database(err.to_string()))?;
+        let id = get_header(headers, "X-ClickHouse-Query-Id")?
+            .parse()
+            .map_err(|err: uuid::Error| Error::Database(err.to_string()))?;
+        Ok(Self { id, elapsed, io_summary: summary })
+    }
+}
diff --git a/oximeter/db/src/client/sql.rs b/oximeter/db/src/client/sql.rs
new file mode 100644
index 0000000000..236faa7aa4
--- /dev/null
+++ b/oximeter/db/src/client/sql.rs
@@ -0,0 +1,104 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Client methods for running SQL queries againts timeseries themselves.
+//!
+//! This implements a prototype system for creating "virtual tables" from each
+//! timeseries, letting us run SQL queries directly against them. These tables
+//! are constructed via huge joins, which effectively reconstruct the entire
+//! history of samples as received from the producers. Each row is the original
+//! sample. This denormalization comes at a big cost, both in cycles and memory
+//! usage, since we need to build the entire join in ClickHouse and send it all
+//! to the client for deserialization.
+//!
+//! Thus this prototype is very useful for development, running analyses on
+//! small datasets. It's less helpful on real deployments, where the size of
+//! data makes this approach prohibitive.
+
+// Copyright 2024 Oxide Computer Company
+
+use super::query_summary::QuerySummary;
+pub use crate::sql::RestrictedQuery;
+use crate::Error;
+use crate::{
+    client::Client,
+    sql::{QueryResult, Table},
+};
+pub use indexmap::IndexMap;
+use slog::debug;
+pub use std::time::Instant;
+
+impl Client {
+    /// Transform a SQL query against a timeseries, but do not execute it.
+    pub async fn transform_query(
+        &self,
+        query: impl AsRef<str>,
+    ) -> Result<String, Error> {
+        let restricted = RestrictedQuery::new(query.as_ref())?;
+        restricted.to_oximeter_sql(&*self.schema.lock().await)
+    }
+
+    /// Run a SQL query against a timeseries.
+    pub async fn query(
+        &self,
+        query: impl AsRef<str>,
+    ) -> Result<QueryResult, Error> {
+        use crate::client::handle_db_response;
+
+        let original_query = query.as_ref().trim_end_matches(';');
+        let ox_sql = self.transform_query(original_query).await?;
+        let rewritten = format!("{ox_sql} FORMAT JSONEachRow");
+        debug!(
+            self.log,
+            "rewrote restricted query";
+            "original_sql" => &original_query,
+            "rewritten_sql" => &rewritten,
+        );
+        let request = self
+            .client
+            .post(&self.url)
+            .query(&[
+                ("output_format_json_quote_64bit_integers", "0"),
+                ("database", crate::DATABASE_NAME),
+            ])
+            .body(rewritten.clone());
+        let query_start = Instant::now();
+        let response = handle_db_response(
+            request
+                .send()
+                .await
+                .map_err(|err| Error::DatabaseUnavailable(err.to_string()))?,
+        )
+        .await?;
+        let summary = QuerySummary::from_headers(
+            query_start.elapsed(),
+            response.headers(),
+        )?;
+        let text = response.text().await.unwrap();
+        let mut table = Table::default();
+        for line in text.lines() {
+            let row =
+                serde_json::from_str::<IndexMap<String, serde_json::Value>>(
+                    line.trim(),
+                )
+                .unwrap();
+            if table.column_names.is_empty() {
+                table.column_names.extend(row.keys().cloned())
+            } else {
+                assert!(table
+                    .column_names
+                    .iter()
+                    .zip(row.keys())
+                    .all(|(k1, k2)| k1 == k2));
+            }
+            table.rows.push(row.into_values().collect());
+        }
+        Ok(QueryResult {
+            original_query: original_query.to_string(),
+            rewritten_query: rewritten,
+            summary,
+            table,
+        })
+    }
+}
diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs
index 24f7d8c2d0..642612b8db 100644
--- a/oximeter/db/src/lib.rs
+++ b/oximeter/db/src/lib.rs
@@ -4,7 +4,7 @@
 
 //! Tools for interacting with the control plane telemetry database.
 
-// Copyright 2023 Oxide Computer Company
+// Copyright 2024 Oxide Computer Company
 
 use crate::query::StringFieldSelector;
 use chrono::DateTime;
@@ -32,14 +32,17 @@ use thiserror::Error;
 
 mod client;
 pub mod model;
+#[cfg(feature = "oxql")]
+pub mod oxql;
 pub mod query;
+#[cfg(any(feature = "sql", test))]
 pub mod sql;
 
+#[cfg(feature = "oxql")]
+pub use client::oxql::OxqlResult;
+pub use client::query_summary::QuerySummary;
 pub use client::Client;
 pub use client::DbWrite;
-pub use client::QueryMetadata;
-pub use client::QueryResult;
-pub use client::Table;
 pub use model::OXIMETER_VERSION;
 
 #[derive(Debug, Error)]
@@ -58,7 +61,7 @@ pub enum Error {
     BadMetadata { key: String, msg: String },
 
     /// An error interacting with the telemetry database
-    #[error("Error interacting with telemetry database")]
+    #[error("Error interacting with telemetry database: {0}")]
     Database(String),
 
     /// A schema provided when collecting samples did not match the expected schema
@@ -134,8 +137,20 @@ pub enum Error {
     #[error("Schema update versions must be sequential without gaps")]
     NonSequentialSchemaVersions,
 
+    #[cfg(any(feature = "sql", test))]
     #[error("SQL error")]
     Sql(#[from] sql::Error),
+
+    #[cfg(any(feature = "oxql", test))]
+    #[error(transparent)]
+    Oxql(oxql::Error),
+}
+
+#[cfg(any(feature = "oxql", test))]
+impl From<crate::oxql::Error> for Error {
+    fn from(e: crate::oxql::Error) -> Self {
+        Error::Oxql(e)
+    }
 }
 
 impl From<model::DbTimeseriesSchema> for TimeseriesSchema {
diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs
index b1b45eabc4..414ad25ba7 100644
--- a/oximeter/db/src/model.rs
+++ b/oximeter/db/src/model.rs
@@ -1600,30 +1600,23 @@ pub(crate) fn parse_field_select_row(
 ) -> (TimeseriesKey, Target, Metric) {
     assert_eq!(
         row.fields.len(),
-        2 * schema.field_schema.len(),
-        "Expected pairs of (field_name, field_value) from the field query"
+        schema.field_schema.len(),
+        "Expected the same number of fields in each row as the schema itself",
     );
     let (target_name, metric_name) = schema.component_names();
     let mut target_fields = Vec::new();
     let mut metric_fields = Vec::new();
-    let mut actual_fields = row.fields.values();
+    let mut actual_fields = row.fields.iter();
     for _ in 0..schema.field_schema.len() {
         // Extract the field name from the row and find a matching expected field.
-        let actual_field_name = actual_fields
+        let (actual_field_name, actual_field_value) = actual_fields
             .next()
             .expect("Missing a field name from a field select query");
-        let name = actual_field_name
-            .as_str()
-            .expect("Expected a string field name")
-            .to_string();
-        let expected_field = schema.schema_for_field(&name).expect(
+        let expected_field = schema.schema_for_field(actual_field_name).expect(
             "Found field with name that is not part of the timeseries schema",
         );
 
         // Parse the field value as the expected type
-        let actual_field_value = actual_fields
-            .next()
-            .expect("Missing a field value from a field select query");
         let value = match expected_field.field_type {
             FieldType::Bool => {
                 FieldValue::Bool(bool::from(DbBool::from(
@@ -1726,7 +1719,7 @@ pub(crate) fn parse_field_select_row(
                     )
             }
         };
-        let field = Field { name, value };
+        let field = Field { name: actual_field_name.to_string(), value };
         match expected_field.source {
             FieldSource::Target => target_fields.push(field),
             FieldSource::Metric => metric_fields.push(field),
diff --git a/oximeter/db/src/oxql/ast/cmp.rs b/oximeter/db/src/oxql/ast/cmp.rs
new file mode 100644
index 0000000000..ea33056c1f
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/cmp.rs
@@ -0,0 +1,76 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! An AST node describing comparison operators
+
+// Copyright 2024 Oxide Computer Company
+
+use std::fmt;
+
+/// Comparison operators.
+// TODO-completeness: Operators for other types, like IP containment ('<<').
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum Comparison {
+    /// Equality comparison.
+    Eq,
+    /// Inequality comparison.
+    Ne,
+    /// Greater-than comparison
+    Gt,
+    /// Greater-than or equals comparison
+    Ge,
+    /// Lesser-than comparison
+    Lt,
+    /// Lesser-than or equals comparison
+    Le,
+    /// Regular expression pattern matching.
+    Like,
+}
+
+impl Comparison {
+    // Return the _function name_ of the comparison that is safe for use in
+    // ClickHouse.
+    //
+    // Note that we're always using the functional form for these comparisons,
+    // even when they have obvious operators. E.g., we return `"equals"` for the
+    // `Comparison::Eq` rather than `"=="`.
+    //
+    // This is to normalize the different comparisons we support, which do not
+    // all have operator formats. `Comparison::Like` is the best example, but we
+    // may also want to support things like IP address containment. While DBs
+    // like PostgreSQL have the `<<` operator for that, ClickHouse supports only
+    // the function `isIPAddressInRange()`.
+    //
+    // One consequence of this is that the caller needs to wrap the argument in
+    // parentheses manually.
+    pub(crate) fn as_db_function_name(&self) -> &'static str {
+        match self {
+            Comparison::Eq => "equals",
+            Comparison::Ne => "notEquals",
+            Comparison::Gt => "greater",
+            Comparison::Ge => "greaterOrEquals",
+            Comparison::Lt => "less",
+            Comparison::Le => "lessOrEquals",
+            Comparison::Like => "match",
+        }
+    }
+}
+
+impl fmt::Display for Comparison {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{}",
+            match self {
+                Comparison::Eq => "==",
+                Comparison::Ne => "!=",
+                Comparison::Gt => ">",
+                Comparison::Ge => ">=",
+                Comparison::Lt => "<",
+                Comparison::Le => "<=",
+                Comparison::Like => "~=",
+            }
+        )
+    }
+}
diff --git a/oximeter/db/src/oxql/ast/grammar.rs b/oximeter/db/src/oxql/ast/grammar.rs
new file mode 100644
index 0000000000..00a0e6e0fe
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/grammar.rs
@@ -0,0 +1,1334 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Grammar for the Oximeter Query Language (OxQL).
+
+// Copyright 2024 Oxide Computer
+
+peg::parser! {
+    pub grammar query_parser() for str {
+        use crate::oxql::ast::cmp::Comparison;
+        use crate::oxql::ast::table_ops::align::Align;
+        use crate::oxql::ast::table_ops::align::AlignmentMethod;
+        use crate::oxql::ast::table_ops::filter::SimpleFilter;
+        use crate::oxql::ast::table_ops::filter::FilterExpr;
+        use crate::oxql::ast::table_ops::filter::Filter;
+        use crate::oxql::ast::table_ops::filter::CompoundFilter;
+        use crate::oxql::ast::table_ops::get::Get;
+        use crate::oxql::ast::table_ops::group_by::GroupBy;
+        use crate::oxql::ast::ident::Ident;
+        use crate::oxql::ast::literal::Literal;
+        use crate::oxql::ast::logical_op::LogicalOp;
+        use crate::oxql::ast::Query;
+        use crate::oxql::ast::table_ops::join::Join;
+        use crate::oxql::ast::table_ops::GroupedTableOp;
+        use crate::oxql::ast::table_ops::BasicTableOp;
+        use crate::oxql::ast::table_ops::TableOp;
+        use crate::oxql::ast::table_ops::group_by::Reducer;
+        use crate::oxql::ast::literal::duration_consts;
+        use oximeter::TimeseriesName;
+        use std::time::Duration;
+        use uuid::Uuid;
+        use chrono::Utc;
+        use chrono::DateTime;
+        use chrono::NaiveDateTime;
+        use chrono::NaiveDate;
+        use chrono::NaiveTime;
+        use std::net::IpAddr;
+        use std::net::Ipv4Addr;
+        use std::net::Ipv6Addr;
+
+        rule _ = quiet!{[' ' | '\n' | '\t']+} / expected!("whitespace")
+
+        // Parse boolean literals.
+        rule true_literal() -> bool = "true" { true }
+        rule false_literal() -> bool = "false" { false }
+        pub(super) rule boolean_literal_impl() -> bool
+            = quiet! { true_literal() / false_literal() } / expected!("boolean literal")
+
+        pub rule boolean_literal() -> Literal
+            = b:boolean_literal_impl() { Literal::Boolean(b) }
+
+        // Parse duration literals.
+        rule year() -> Duration
+            = "Y" { duration_consts::YEAR }
+        rule month() -> Duration
+            = "M" { duration_consts::MONTH }
+        rule week() -> Duration
+            = "w" { duration_consts::WEEK }
+        rule day() -> Duration
+            = "d" { duration_consts::DAY }
+        rule hour() -> Duration
+            = "h" { duration_consts::HOUR }
+        rule minute() -> Duration
+            = "m" { duration_consts::MINUTE }
+        rule second() -> Duration
+            = "s" { duration_consts::SECOND }
+        rule millisecond() -> Duration
+            = "ms" { duration_consts::MILLISECOND }
+        rule microsecond() -> Duration
+            = "us" { duration_consts::MICROSECOND }
+        rule nanosecond() -> Duration
+            = "ns" { duration_consts::NANOSECOND }
+        pub(super) rule duration_literal_impl() -> Duration
+            = count:integer_literal_impl() base:(
+                year() /
+                month() /
+                week() / day() /
+                hour() /
+                millisecond() /
+                minute() /
+                second() /
+                microsecond() /
+                nanosecond()
+            )
+        {?
+            // NOTE: This count is the factor by which we multiply the base
+            // unit. So it counts the number of nanos, millis, or days, etc. It
+            // does not limit the total duration itself.
+            let Ok(count) = u32::try_from(count) else {
+                return Err("invalid count for duration literal");
+            };
+            base.checked_mul(count).ok_or("overflowed duration literal")
+        }
+
+        /// Parse a literal duration from a string.
+        ///
+        /// Durations are written as a positive integer multiple of a base time
+        /// unit. For example, `7s` is interpreted as 7 seconds. Supported units
+        /// are:
+        ///
+        /// - 'y': an approximate year, 365 days
+        /// - 'M': an approximate month, 30 days
+        /// - 'w': an approximate week, 7 days
+        /// - 'h': an hour, 3600 seconds
+        /// - 'm': a minute, 60 seconds
+        /// - 's': seconds
+        /// - 'ms': milliseconds
+        /// - 'us': microseconds
+        /// - 'ns': nanoseconds
+        pub rule duration_literal() -> Literal
+            = d:duration_literal_impl() { Literal::Duration(d) }
+
+        /// Parse a literal timestamp.
+        ///
+        /// Timestamps are literals prefixed with `@`. They can be in one of
+        /// several formats:
+        ///
+        /// - YYYY-MM-DD
+        /// - HH:MM:SS[.f]
+        /// - RFC 3339, `YYYY-MM-DDTHH:MM:SS.f`
+        /// - The literal `now()`, possibly with some simple offset expression,
+        /// such as `now() - 5m`. The offset must be a duration.
+        ///
+        /// All timestamps are in UTC.
+        pub rule timestamp_literal() -> Literal
+            = t:timestamp_literal_impl() { Literal::Timestamp(t) }
+
+        rule timestamp_literal_impl() -> DateTime<Utc>
+            = timestamp_string()
+            / now_timestamp()
+
+        pub(super) rule timestamp_string() -> DateTime<Utc>
+            = "@" s:$(['0'..='9' | '-' | 'T' | ':' | '.']+)
+        {?
+            if let Ok(t) = NaiveDate::parse_from_str(s, "%F") {
+                return Ok(t.and_hms_opt(0, 0, 0).unwrap().and_utc());
+            }
+            if let Ok(t) = NaiveTime::parse_from_str(s, "%H:%M:%S%.f") {
+                return Ok(NaiveDateTime::new(Utc::now().date_naive(), t).and_utc());
+            }
+            if let Ok(t) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f") {
+                return Ok(t.and_utc());
+            }
+            Err("a recognized timestamp format")
+        }
+
+        rule now_offset() -> (bool, Duration)
+            = _? sign:['+' | '-'] _? dur:duration_literal_impl()
+        {
+            let negative = matches!(sign, '-');
+            (negative, dur)
+        }
+
+        pub(super) rule now_timestamp() -> DateTime<Utc>
+            = "@now()" maybe_offset:now_offset()?
+        {
+            let now = Utc::now();
+            if let Some((negative, offset)) = maybe_offset {
+                if negative {
+                    now - offset
+                } else {
+                    now + offset
+                }
+            } else {
+                now
+            }
+        }
+
+        /// Parse an IP address literal, either IPv4 or IPv6
+        pub rule ip_literal() -> Literal
+            = ip:ipv4_literal() { Literal::IpAddr(IpAddr::V4(ip)) }
+            / ip:ipv6_literal() { Literal::IpAddr(IpAddr::V6(ip)) }
+
+        pub(super) rule ipv4_literal() -> Ipv4Addr
+            = "\"" s:$((['0'..='9']*<1,3>)**<4> ".") "\""
+        {?
+            s.parse().map_err(|_| "an IPv4 address")
+        }
+
+        pub(super) rule ipv6_literal() -> Ipv6Addr
+            = "\"" s:$(['a'..='f' | '0'..='9' | ':']+) "\""
+        {?
+            s.parse().map_err(|_| "an IPv6 address")
+        }
+
+        rule dashed_uuid_literal() -> Uuid
+            = s:$(
+                "\""
+                ['a'..='f' | '0'..='9']*<8> "-"
+                ['a'..='f' | '0'..='9']*<4> "-"
+                ['a'..='f' | '0'..='9']*<4> "-"
+                ['a'..='f' | '0'..='9']*<4> "-"
+                ['a'..='f' | '0'..='9']*<12>
+                "\""
+            ) {?
+                let Some(middle) = s.get(1..37) else {
+                    return Err("invalid UUID literal");
+                };
+                middle.parse().or(Err("invalid UUID literal"))
+            }
+        rule undashed_uuid_literal() -> Uuid
+            = s:$("\"" ['a'..='f' | '0'..='9']*<32> "\"") {?
+            let Some(middle) = s.get(1..33) else {
+                return Err("invalid UUID literal");
+            };
+            middle.parse().or(Err("invalid UUID literal"))
+        }
+        pub(super) rule uuid_literal_impl() -> Uuid
+            = dashed_uuid_literal() / undashed_uuid_literal()
+
+        /// Parse UUID literals.
+        ///
+        /// UUIDs should be quoted with `"` and can include or omit dashes
+        /// between the segments. Both of the following are equivalent.
+        ///
+        /// "fc59ab26-f1d8-44ca-abbc-dd8f61321433"
+        /// "fc59ab26f1d844caabbcdd8f61321433"
+        pub rule uuid_literal() -> Literal
+            = id:uuid_literal_impl() { Literal::Uuid(id) }
+
+        // Parse string literals.
+        rule any_but_single_quote() -> String
+            = s:$([^'\'']*)
+        {?
+            recognize_escape_sequences(s).ok_or("invalid single quoted string")
+        }
+
+        rule any_but_double_quote() -> String
+            = s:$([^'"']*)
+        {?
+            recognize_escape_sequences(s).ok_or("invalid double quoted string")
+        }
+
+        rule single_quoted_string() -> String
+            = "'" s:any_but_single_quote() "'" { s }
+
+        rule double_quoted_string() -> String
+            = "\"" s:any_but_double_quote() "\"" { s }
+
+        pub(super) rule string_literal_impl() -> String
+            = single_quoted_string() / double_quoted_string()
+
+        /// Parse a string literal, either single- or double-quoted.
+        ///
+        /// Parsing string literals is pretty tricky, but we add several
+        /// constraints to simplify things. First strings must be quoted, either
+        /// with single- or double-quotes. E.g., the strings `"this"` and
+        /// `'this'` parse the same way.
+        ///
+        /// We require that the string not _contain_ its quote-style, so there
+        /// can't be any embedded single-quotes in a single-quoted string, or
+        /// double-quotes in a double-quoted string. Each quote-style may contain
+        /// the quote from the other style.
+        ///
+        /// We support the following common escape sequences:
+        ///
+        /// ```ignore
+        /// \n
+        /// \r
+        /// \t
+        /// \\
+        /// \0
+        /// ```
+        ///
+        /// Beyond this, any valid Unicode code point, written in the usual Rust
+        /// style, is supported. For example, `\u{1234}` is accepted and mapped
+        /// to `ሴ` upon parsing. This also allows users to write both quote
+        /// styles if required, by writing them as their Unicode escape
+        /// sequences. For example, this string:
+        ///
+        /// ```ignore
+        /// "this string has \u{22} in it"
+        /// ```
+        ///
+        /// Will be parsed as `this string has " in it`.
+        pub rule string_literal() -> Literal
+            = s:string_literal_impl() { Literal::String(s) }
+
+        pub(super) rule integer_literal_impl() -> i128
+            = n:$("-"? ['0'..='9']+ !['e' | 'E' | '.'])
+        {?
+            let Ok(x) = n.parse() else {
+                return Err("integer literal");
+            };
+            if x < i128::from(i64::MIN) {
+                Err("negative overflow")
+            } else if x > i128::from(u64::MAX) {
+                Err("positive overflow")
+            } else {
+                Ok(x)
+            }
+        }
+
+        /// Parse integer literals.
+        pub rule integer_literal() -> Literal
+            = n:integer_literal_impl() { Literal::Integer(n) }
+
+        // We're being a bit lazy here, since the rule expression isn't exactly
+        // right. But we rely on calling `f64`'s `FromStr` implementation to
+        // actually verify the values can be parsed.
+        pub(super) rule double_literal_impl() -> f64
+            = n:$("-"? ['0'..='9']* "."? ['0'..='9']* (['e' | 'E'] "-"?  ['0'..='9']+)*) {?
+                n.parse().or(Err("double literal"))
+            }
+
+        // Parse double literals.
+        pub rule double_literal() -> Literal
+            = d:double_literal_impl() { Literal::Double(d) }
+
+        /// Parse a literal.
+        ///
+        /// Literals are typed, with support for bools, durations, integers and
+        /// doubles, UUIDs, and general strings. See the rules for each type of
+        /// literal for details on supported formats.
+        pub rule literal() -> Literal
+            = lit:(
+                boolean_literal() /
+                duration_literal() /
+                integer_literal() /
+                double_literal() /
+                uuid_literal() /
+                ip_literal() /
+                string_literal() /
+                timestamp_literal()
+            )
+        {
+            lit
+        }
+
+        /// Parse a logical operator.
+        pub(super) rule logical_op_impl() -> LogicalOp
+            = "||" { LogicalOp::Or}
+            / "&&" { LogicalOp::And }
+            / "^" { LogicalOp::Xor }
+
+
+        // NOTES:
+        //
+        // The rules below are all used to parse a filtering expression. This
+        // turns out to be surprisingly complicated to express succinctly in
+        // `peg`, but there are a few tricks. First, it's important that we do
+        // not try to parse negation ("!") inside the filtering atoms -- it's a
+        // higher-level concept, and not part of the atom itself.
+        //
+        // Second, it's not clear how to use `peg`'s precendence macro to
+        // correctly describe the precedence. Things are recursive, but we
+        // choose to define that in the rules themselves, rather than explicitly
+        // with precedence levels. This is common in PEG definitions, and the
+        // main trick is force things _not_ to be left-recursive, and use two
+        // rules tried in sequence. The `factor` rule is a good example of this.
+        //
+        // Another example is the logical OR / AND / XOR parsing. We start with
+        // OR, which is the lowest precedence, and move to the others in
+        // sequence. Each is defined as parsing either the "thing itself", e.g.,
+        // `foo || bar` for the OR rule; or the rule with next-higher
+        // precedence.
+        //
+        // IMPORTANT: The #[cache] directives on the rules below are _critical_
+        // to avoiding wildly exponential runtime with nested expressions.
+
+        /// Parse a logical negation
+        pub rule not() = "!"
+
+        /// A factor is a logically negated expression, or a primary expression.
+        #[cache]
+        pub rule factor() -> Filter
+            = not() _? factor:factor()
+        {
+            Filter {
+                negated: !factor.negated,
+                expr: factor.expr
+            }
+        }
+            / p:primary() { p }
+
+        /// A primary expression is either a comparison "atom", e.g., `foo ==
+        /// "bar"`, or a grouping around a sequence of such things.
+        #[cache]
+        pub rule primary() -> Filter
+            = atom:comparison_atom()
+        {?
+            if matches!(atom.cmp, Comparison::Like) && !matches!(atom.value, Literal::String(_)) {
+                Err("~= comparison is only supported for string literals")
+            } else {
+                Ok(Filter { negated: false, expr: FilterExpr::Simple(atom) })
+            }
+        }
+            / "(" _? or:logical_or_expr() _? ")" { or }
+
+        /// A comparison atom is a base-case for all this recursion.
+        ///
+        /// It specifies a single comparison between an identifier and a value,
+        /// using a specific comparison operator. For example, this parses `foo
+        /// == "bar"`.
+        pub rule comparison_atom() -> SimpleFilter
+            = ident:ident() _? cmp:comparison() _? value:literal()
+        {
+            SimpleFilter { ident, cmp, value }
+        }
+
+        /// Two filtering expressions combined with a logical OR.
+        ///
+        /// An OR expression is two logical ANDs joined with "||", or just a
+        /// bare logical AND expression.
+        #[cache]
+        pub rule logical_or_expr() -> Filter
+            = left:logical_and_expr() _? "||" _? right:logical_or_expr()
+        {
+            let compound = CompoundFilter {
+                left: Box::new(left),
+                op: LogicalOp::Or,
+                right: Box::new(right),
+            };
+            Filter { negated: false, expr: FilterExpr::Compound(compound) }
+        }
+            / logical_and_expr()
+
+        /// Two filtering expressions combined with a logical AND.
+        ///
+        /// A logical AND expression is two logical XORs joined with "&&", or
+        /// just a bare logical XOR expression.
+        #[cache]
+        pub rule logical_and_expr() -> Filter
+            = left:logical_xor_expr() _? "&&" _? right:logical_and_expr()
+        {
+            let compound = CompoundFilter {
+                left: Box::new(left),
+                op: LogicalOp::And,
+                right: Box::new(right),
+            };
+            Filter { negated: false, expr: FilterExpr::Compound(compound) }
+        }
+            / logical_xor_expr()
+
+        /// Two filtering expressions combined with a logical XOR.
+        ///
+        /// A logical XOR expression is two logical XORs joined with "^ or
+        /// just a bare factor. Note that this either hits the base case, if
+        /// `factor` is actually an atom, or recurses again if its a logical OR
+        /// expression.
+        ///
+        /// Note that this is the highest-precedence logical operator.
+        #[cache]
+        pub rule logical_xor_expr() -> Filter
+            = left:factor() _? "^" _? right:logical_xor_expr()
+        {
+            let compound = CompoundFilter {
+                left: Box::new(left),
+                op: LogicalOp::Xor,
+                right: Box::new(right),
+            };
+            Filter { negated: false, expr: FilterExpr::Compound(compound) }
+        }
+            / factor:factor() { factor }
+
+        /// Parse the _logical expression_ part of a `filter` table operation.
+        pub rule filter_expr() -> Filter = logical_or_expr()
+
+        /// Parse a "filter" table operation.
+        pub rule filter() -> Filter
+            = "filter" _ expr:filter_expr() _?
+        {
+            expr
+        }
+
+        pub(super) rule ident_impl() -> &'input str
+            = quiet!{ inner:$(['a'..='z']+ ['a'..='z' | '0'..='9']* ("_" ['a'..='z' | '0'..='9']+)*) } /
+                expected!("A valid identifier")
+
+        /// Parse an identifier, usually a column name.
+        pub rule ident() -> Ident
+            = inner:ident_impl() { Ident(inner.to_string()) }
+
+        pub(super) rule comparison() -> Comparison
+            = "==" { Comparison::Eq }
+            / "!=" { Comparison::Ne }
+            / ">=" { Comparison::Ge }
+            / ">" { Comparison::Gt }
+            / "<=" { Comparison::Le }
+            / "<" { Comparison::Lt }
+            / "~=" { Comparison::Like }
+
+        pub rule timeseries_name() -> TimeseriesName
+            = target_name:ident_impl() ":" metric_name:ident_impl()
+        {?
+            format!("{target_name}:{metric_name}")
+                .try_into()
+                .map_err(|_| "invalid timeseries name")
+        }
+
+        rule get_delim() = quiet!{ _? "," _? }
+
+        /// Parse a "get" table operation.
+        pub rule get() -> Vec<Get>
+            = "get" _ names:(timeseries_name() **<1,> get_delim())
+        {
+            names.into_iter().map(|t| Get { timeseries_name: t }).collect()
+        }
+
+        /// Parse a reducing operation by name.
+        pub rule reducer() -> Reducer
+            = "mean" { Reducer::Mean }
+            / "sum" { Reducer::Sum }
+            / expected!("a reducer name")
+
+        rule ws_with_comma() = _? "," _?
+        pub rule group_by() -> GroupBy
+            = "group_by"
+                _
+                "[" _? identifiers:(ident() ** ws_with_comma()) ","? _? "]"
+                reducer:("," _? red:reducer() { red })?
+        {
+            GroupBy {
+                identifiers,
+                reducer: reducer.unwrap_or_default(),
+            }
+        }
+
+        /// Parse a `join` table operation.
+        pub rule join() = "join" {}
+
+        pub(super) rule alignment_method() -> AlignmentMethod
+            = "interpolate" { AlignmentMethod::Interpolate }
+            / "mean_within" { AlignmentMethod::MeanWithin }
+
+        /// Parse an alignment table operation.
+        pub rule align() -> Align
+            = "align" _ method:alignment_method() "(" period:duration_literal_impl() ")"
+        {
+            Align { method, period }
+        }
+
+        pub(super) rule basic_table_op() -> TableOp
+            = g:"get" _ t:timeseries_name() { TableOp::Basic(BasicTableOp::Get(t)) }
+            / f:filter() { TableOp::Basic(BasicTableOp::Filter(f)) }
+            / g:group_by() { TableOp::Basic(BasicTableOp::GroupBy(g)) }
+            / join() { TableOp::Basic(BasicTableOp::Join(Join)) }
+            / a:align() { TableOp::Basic(BasicTableOp::Align(a)) }
+
+        pub(super) rule grouped_table_op() -> TableOp
+            = "{" _? ops:(query() ++ grouped_table_op_delim()) _? "}"
+        {
+            TableOp::Grouped(GroupedTableOp { ops })
+        }
+
+        /// Parse a top-level OxQL query.
+        ///
+        /// Queries always start with a "get" operation, and may be followed by
+        /// any number of other timeseries transformations
+        pub rule query() -> Query
+            = ops:(basic_table_op() / grouped_table_op()) ++ query_delim()
+        {?
+            let query = Query { ops };
+            if query.all_gets_at_query_start() {
+                Ok(query)
+            } else {
+                Err("every subquery must start with a `get` operation")
+            }
+        }
+
+        rule grouped_table_op_delim() = quiet!{ _? ";" _? }
+        rule query_delim() = quiet!{ _? "|" _? }
+    }
+}
+
+// Recognize escape sequences and convert them into the intended Unicode point
+// they represent.
+//
+// For example, the string containing ASCII "abcd" is returned unchanged.
+//
+// The string containing "\u{1234}" is returned as the string "ሴ". Note that the
+// Unicode bytes must be enclosed in {}, and can have length 1-6.
+//
+// If the string contains an invalid escape sequence, such as "\uFFFF", or a
+// control code, such as `\u07`, `None` is returned.
+//
+// Note that the main goal of this method is to _unescape_ relevant sequences.
+// We will get queries that may contain escaped sequences, like `\\\n`, which
+// this method will unescape to `\n`.
+fn recognize_escape_sequences(s: &str) -> Option<String> {
+    let mut out = String::with_capacity(s.len());
+
+    let mut chars = s.chars().peekable();
+    while let Some(ch) = chars.next() {
+        match ch {
+            '\\' => {
+                let Some(next_ch) = chars.next() else {
+                    // Escape at the end of the string
+                    return None;
+                };
+                match next_ch {
+                    'n' => out.push('\n'),
+                    'r' => out.push('\r'),
+                    't' => out.push('\t'),
+                    '\\' => out.push('\\'),
+                    '0' => out.push('\0'),
+                    'u' => {
+                        // We need this to be delimited by {}, and between 1 and
+                        // 6 characters long.
+                        if !matches!(chars.next(), Some('{')) {
+                            return None;
+                        }
+
+                        let mut digits = String::with_capacity(6);
+                        let mut found_closing_brace = false;
+                        while !found_closing_brace && digits.len() < 7 {
+                            // Take the next value, if it's a hex digit or the
+                            // closing brace.
+                            let Some(next) = chars.next_if(|ch| {
+                                ch.is_ascii_hexdigit() || *ch == '}'
+                            }) else {
+                                break;
+                            };
+                            if next.is_ascii_hexdigit() {
+                                digits.push(next);
+                                continue;
+                            }
+                            found_closing_brace = true;
+                        }
+                        if !found_closing_brace {
+                            return None;
+                        }
+                        let val = u32::from_str_radix(&digits, 16).ok()?;
+                        let decoded = char::from_u32(val)?;
+                        out.push(decoded)
+                    }
+                    _ => return None,
+                }
+            }
+            _ => out.push(ch),
+        }
+    }
+    Some(out)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::query_parser;
+    use crate::oxql::ast::cmp::Comparison;
+    use crate::oxql::ast::grammar::recognize_escape_sequences;
+    use crate::oxql::ast::ident::Ident;
+    use crate::oxql::ast::literal::Literal;
+    use crate::oxql::ast::logical_op::LogicalOp;
+    use crate::oxql::ast::table_ops::align::Align;
+    use crate::oxql::ast::table_ops::align::AlignmentMethod;
+    use crate::oxql::ast::table_ops::filter::CompoundFilter;
+    use crate::oxql::ast::table_ops::filter::Filter;
+    use crate::oxql::ast::table_ops::filter::FilterExpr;
+    use crate::oxql::ast::table_ops::filter::SimpleFilter;
+    use crate::oxql::ast::table_ops::group_by::Reducer;
+    use chrono::DateTime;
+    use chrono::NaiveDate;
+    use chrono::NaiveDateTime;
+    use chrono::NaiveTime;
+    use chrono::TimeZone;
+    use chrono::Utc;
+    use std::net::IpAddr;
+    use std::net::Ipv4Addr;
+    use std::net::Ipv6Addr;
+    use std::time::Duration;
+    use uuid::Uuid;
+
+    #[test]
+    fn test_boolean_literal() {
+        assert_eq!(query_parser::boolean_literal_impl("true").unwrap(), true);
+        assert_eq!(query_parser::boolean_literal_impl("false").unwrap(), false);
+    }
+
+    #[test]
+    fn test_duration_literal() {
+        for (as_str, dur) in [
+            ("7Y", Duration::from_secs(60 * 60 * 24 * 365 * 7)),
+            ("7M", Duration::from_secs(60 * 60 * 24 * 30 * 7)),
+            ("7w", Duration::from_secs(60 * 60 * 24 * 7 * 7)),
+            ("7d", Duration::from_secs(60 * 60 * 24 * 7)),
+            ("7h", Duration::from_secs(60 * 60 * 7)),
+            ("7m", Duration::from_secs(60 * 7)),
+            ("7s", Duration::from_secs(7)),
+            ("7ms", Duration::from_millis(7)),
+            ("7us", Duration::from_micros(7)),
+            ("7ns", Duration::from_nanos(7)),
+        ] {
+            assert_eq!(
+                query_parser::duration_literal_impl(as_str).unwrap(),
+                dur
+            );
+        }
+
+        assert!(query_parser::duration_literal_impl("-1m").is_err());
+        let too_big: i64 = u32::MAX as i64 + 1;
+        assert!(query_parser::duration_literal_impl(&format!("{too_big}s"))
+            .is_err());
+    }
+
+    #[test]
+    fn test_uuid_literal() {
+        const ID: Uuid = uuid::uuid!("9f8900bd-886d-4988-b623-95b7fda36d23");
+        let as_string = format!("\"{}\"", ID);
+        assert_eq!(query_parser::uuid_literal_impl(&as_string).unwrap(), ID);
+        let without_dashes = as_string.replace('-', "");
+        assert_eq!(
+            query_parser::uuid_literal_impl(&without_dashes).unwrap(),
+            ID
+        );
+
+        assert!(query_parser::uuid_literal_impl(
+            &as_string[1..as_string.len() - 2]
+        )
+        .is_err());
+        assert!(query_parser::uuid_literal_impl(
+            &without_dashes[1..without_dashes.len() - 2]
+        )
+        .is_err());
+    }
+
+    #[test]
+    fn test_integer_literal() {
+        assert_eq!(query_parser::integer_literal_impl("1").unwrap(), 1);
+        assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1);
+        assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1);
+
+        assert!(query_parser::integer_literal_impl("-1.0").is_err());
+        assert!(query_parser::integer_literal_impl("-1.").is_err());
+        assert!(query_parser::integer_literal_impl("1e3").is_err());
+    }
+
+    #[test]
+    fn test_double_literal() {
+        assert_eq!(query_parser::double_literal_impl("1.0").unwrap(), 1.0);
+        assert_eq!(query_parser::double_literal_impl("-1.0").unwrap(), -1.0);
+        assert_eq!(query_parser::double_literal_impl("1.").unwrap(), 1.0);
+        assert_eq!(query_parser::double_literal_impl("-1.").unwrap(), -1.0);
+        assert_eq!(query_parser::double_literal_impl(".5").unwrap(), 0.5);
+        assert_eq!(query_parser::double_literal_impl("-.5").unwrap(), -0.5);
+        assert_eq!(query_parser::double_literal_impl("1e3").unwrap(), 1e3);
+        assert_eq!(query_parser::double_literal_impl("-1e3").unwrap(), -1e3);
+        assert_eq!(query_parser::double_literal_impl("-1e-3").unwrap(), -1e-3);
+        assert_eq!(
+            query_parser::double_literal_impl("0.5e-3").unwrap(),
+            0.5e-3
+        );
+
+        assert!(query_parser::double_literal_impl("-.e4").is_err());
+        assert!(query_parser::double_literal_impl("-.e-4").is_err());
+        assert!(query_parser::double_literal_impl("1e").is_err());
+    }
+
+    #[test]
+    fn test_recognize_escape_sequences_with_none() {
+        for each in ["", "abc", "$%("] {
+            assert_eq!(recognize_escape_sequences(each).unwrap(), each);
+        }
+    }
+
+    #[test]
+    fn test_recognize_escape_sequence_with_valid_unicode_sequence() {
+        // Welp, let's just test every possible code point.
+        for x in 0..=0x10FFFF {
+            let expected = char::from_u32(x);
+            let as_hex = format!("{x:0x}");
+            let sequence = format!("\\u{{{as_hex}}}");
+            let recognized = recognize_escape_sequences(&sequence)
+                .map(|s| s.chars().next().unwrap());
+            assert_eq!(
+                expected, recognized,
+                "did not correctly recognized Unicode escape sequence"
+            );
+        }
+    }
+
+    #[test]
+    fn test_recognize_escape_sequences_with_invalid_unicode_sequence() {
+        for each in [
+            r#"\uFFFF"#,       // Valid, but not using {} delimiters
+            r#"\u{}"#,         // Not enough characters.
+            r#"\u{12345678}"#, // Too many characters
+            r#"\u{ZZZZ}"#,     // Not hex digits
+            r#"\u{d800}"#,     // A surrogate code point, not valid.
+            r#"\u{1234"#,      // Valid, but missing closing brace.
+        ] {
+            println!("{each}");
+            assert!(recognize_escape_sequences(each).is_none());
+        }
+    }
+
+    #[test]
+    fn test_recognize_escape_sequences_with_valid_escape_sequence() {
+        for (as_str, expected) in [
+            (r#"\n"#, '\n'),
+            (r#"\r"#, '\r'),
+            (r#"\t"#, '\t'),
+            (r#"\0"#, '\0'),
+            (r#"\\"#, '\\'),
+        ] {
+            let recognized = recognize_escape_sequences(as_str).unwrap();
+            assert_eq!(recognized.chars().next().unwrap(), expected);
+        }
+    }
+
+    #[test]
+    fn test_single_quoted_string_literal() {
+        for (input, expected) in [
+            ("''", String::new()),
+            ("'simple'", String::from("simple")),
+            ("'袈►♖'", String::from("袈►♖")),
+            (r#"'escapes \n handled'"#, String::from("escapes \n handled")),
+            (r#"'may contain " in it'"#, String::from("may contain \" in it")),
+            (
+                r#"'may contain "\u{1234}" in it'"#,
+                String::from("may contain \"ሴ\" in it"),
+            ),
+        ] {
+            assert_eq!(
+                query_parser::string_literal_impl(input).unwrap(),
+                expected
+            );
+        }
+        assert!(query_parser::string_literal_impl(r#"' cannot have ' in it'"#)
+            .is_err());
+    }
+
+    #[test]
+    fn test_double_quoted_string_literal() {
+        for (input, expected) in [
+            ("\"\"", String::new()),
+            ("\"simple\"", String::from("simple")),
+            ("\"袈►♖\"", String::from("袈►♖")),
+            (r#""escapes \n handled""#, String::from("escapes \n handled")),
+            (r#""may contain ' in it""#, String::from("may contain ' in it")),
+            (
+                r#""may contain '\u{1234}' in it""#,
+                String::from("may contain 'ሴ' in it"),
+            ),
+        ] {
+            assert_eq!(
+                query_parser::string_literal_impl(input).unwrap(),
+                expected
+            );
+        }
+
+        assert!(query_parser::string_literal_impl(r#"" cannot have " in it""#)
+            .is_err());
+    }
+
+    #[test]
+    fn test_comparison() {
+        for (as_str, cmp) in [
+            ("==", Comparison::Eq),
+            ("!=", Comparison::Ne),
+            (">=", Comparison::Ge),
+            (">", Comparison::Gt),
+            ("<=", Comparison::Le),
+            ("<", Comparison::Lt),
+            ("~=", Comparison::Like),
+        ] {
+            assert_eq!(query_parser::comparison(as_str).unwrap(), cmp);
+        }
+    }
+
+    #[test]
+    fn test_filter_expr_single_simple_expression() {
+        let expr = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Boolean(true),
+            }),
+        };
+        assert_eq!(query_parser::filter_expr("a == true").unwrap(), expr);
+        assert_eq!(query_parser::filter_expr("(a == true)").unwrap(), expr);
+
+        assert!(query_parser::filter_expr("(a == true").is_err());
+    }
+
+    #[test]
+    fn test_filter_expr_single_negated_simple_expression() {
+        let expr = Filter {
+            negated: true,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".to_string()),
+                cmp: Comparison::Gt,
+                value: Literal::Double(1.0),
+            }),
+        };
+        assert_eq!(query_parser::filter_expr("!(a > 1.)").unwrap(), expr,);
+
+        assert!(query_parser::filter_expr("!(a > 1.0").is_err());
+    }
+
+    #[test]
+    fn test_filter_expr_two_simple_filter_expressions() {
+        let left = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Boolean(true),
+            }),
+        };
+
+        for op in [LogicalOp::And, LogicalOp::Or] {
+            let expected = left.merge(&left, op);
+            // Match with either parenthesized.
+            let as_str = format!("a == true {op} (a == true)");
+            assert_eq!(query_parser::filter_expr(&as_str).unwrap(), expected);
+            let as_str = format!("(a == true) {op} a == true");
+            assert_eq!(query_parser::filter_expr(&as_str).unwrap(), expected);
+            let as_str = format!("(a == true) {op} (a == true)");
+            assert_eq!(query_parser::filter_expr(&as_str).unwrap(), expected);
+        }
+    }
+
+    #[test]
+    fn test_filter_expr_operator_precedence() {
+        // We'll combine the following simple expression in a number of
+        // different sequences, to check that we correctly group by operator
+        // precedence.
+        let atom = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Boolean(true),
+            }),
+        };
+        let as_str = "a == true || a == true && a == true ^ a == true";
+        let parsed = query_parser::filter_expr(as_str).unwrap();
+        assert_eq!(
+            parsed.to_string(),
+            "((a == true) || ((a == true) && ((a == true) ^ (a == true))))"
+        );
+
+        // This should bind most tighty from right to left: XOR, then AND, then
+        // OR. Since we're destructuring from out to in, though, we check in the
+        // opposite order, weakest to strongest, or left to right.
+        //
+        // Start with OR, which should bind the most weakly.
+        assert!(!parsed.negated);
+        let FilterExpr::Compound(CompoundFilter { left, op, right }) =
+            parsed.expr
+        else {
+            unreachable!();
+        };
+        assert!(!left.negated);
+        assert!(!right.negated);
+        assert_eq!(op, LogicalOp::Or);
+        assert_eq!(atom, *left);
+
+        // && should bind next-most tightly
+        let FilterExpr::Compound(CompoundFilter { left, op, right }) =
+            right.expr
+        else {
+            unreachable!();
+        };
+        assert!(!left.negated);
+        assert!(!right.negated);
+        assert_eq!(op, LogicalOp::And);
+        assert_eq!(atom, *left);
+
+        // Followed by XOR, the tightest binding operator.
+        let FilterExpr::Compound(CompoundFilter { left, op, right }) =
+            right.expr
+        else {
+            unreachable!();
+        };
+        assert!(!left.negated);
+        assert!(!right.negated);
+        assert_eq!(op, LogicalOp::Xor);
+        assert_eq!(atom, *left);
+        assert_eq!(atom, *right);
+    }
+
+    #[test]
+    fn test_filter_expr_overridden_precedence() {
+        // Similar to above, we'll test with a single atom, and group in a
+        // number of ways.
+        let atom = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Boolean(true),
+            }),
+        };
+        let as_str = "(a == true || a == true) && a == true";
+        let parsed = query_parser::filter_expr(as_str).unwrap();
+
+        // Now, || should bind more tightly, so we should have (a && b) at the
+        // top-level, where b is the test atom. We're comparing the atom at the
+        // _right_ now with the original expressions.
+        assert!(!parsed.negated);
+        let FilterExpr::Compound(CompoundFilter { left, op, right }) =
+            parsed.expr
+        else {
+            unreachable!();
+        };
+        assert!(!left.negated);
+        assert!(!right.negated);
+        assert_eq!(op, LogicalOp::And);
+        assert_eq!(atom, *right);
+
+        // Destructure the LHS and check it.
+        let FilterExpr::Compound(CompoundFilter { left, op, right }) =
+            left.expr
+        else {
+            unreachable!();
+        };
+        assert!(!left.negated);
+        assert!(!right.negated);
+        assert_eq!(op, LogicalOp::Or);
+        assert_eq!(atom, *left);
+        assert_eq!(atom, *right);
+    }
+
+    #[test]
+    fn test_negated_filter_expr() {
+        let left = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".into()),
+                cmp: Comparison::Eq,
+                value: Literal::Boolean(true),
+            }),
+        };
+        let right = left.negate();
+        let top = left.merge(&right, LogicalOp::Xor).negate();
+        let as_str = "!(a == true ^ !(a == true))";
+        let parsed = query_parser::filter_expr(as_str).unwrap();
+        assert_eq!(top, parsed);
+    }
+
+    #[test]
+    fn test_filter_table_op() {
+        for expr in [
+            "filter field == 0",
+            "filter baz == 'quux'",
+            "filter other_field != 'yes'",
+            "filter id != \"45c937fb-5e99-4a86-a95b-22bf30bf1507\"",
+            "filter (foo == 'bar') || ((yes != \"no\") && !(maybe > 'so'))",
+        ] {
+            let parsed = query_parser::filter(expr).unwrap_or_else(|_| {
+                panic!("failed to parse query: '{}'", expr)
+            });
+            println!("{parsed:#?}");
+        }
+    }
+
+    #[test]
+    fn test_get_table_op() {
+        for expr in [
+            "get foo:bar",
+            "get target_name:metric_name",
+            "get target_name_0:metric_name000",
+        ] {
+            let parsed = query_parser::get(expr).unwrap_or_else(|_| {
+                panic!("failed to parse get expr: '{}'", expr)
+            });
+            println!("{parsed:#?}");
+        }
+
+        assert!(query_parser::get("get foo").is_err());
+        assert!(query_parser::get("get foo:").is_err());
+        assert!(query_parser::get("get :bar").is_err());
+        assert!(query_parser::get("get 0:0").is_err());
+    }
+
+    #[test]
+    fn test_ident() {
+        for id in ["foo", "foo0", "foo_0_1_2"] {
+            query_parser::ident(id)
+                .unwrap_or_else(|_| panic!("failed to identifier: '{id}'"));
+        }
+
+        for id in ["0foo", "0", "A", "", "%", "foo_"] {
+            query_parser::ident(id).expect_err(&format!(
+                "should not have parsed as identifier: '{}'",
+                id
+            ));
+        }
+    }
+
+    #[test]
+    fn test_group_by() {
+        for q in [
+            "group_by []",
+            "group_by [baz]",
+            "group_by [baz,]",
+            "group_by [baz,another_field]",
+            "group_by [baz,another_field,]",
+        ] {
+            let parsed = query_parser::group_by(q)
+                .unwrap_or_else(|_| panic!("failed to parse group_by: '{q}'"));
+            println!("{parsed:#?}");
+        }
+    }
+
+    #[test]
+    fn test_query() {
+        for q in [
+            "get foo:bar",
+            "get foo:bar | group_by []",
+            "get foo:bar | group_by [baz]",
+            "get foo:bar | filter baz == 'quuz'",
+            "get foo:bar | filter (some == 0) && (id == false || a == -1.0)",
+            "get foo:bar | group_by [baz] | filter baz == 'yo'",
+            "{ get foo:bar | filter x == 0; get x:y } | join",
+            "{ get foo:bar ; get x:y } | join | filter baz == 0",
+            "get foo:bar | align interpolate(10s)",
+        ] {
+            let parsed = query_parser::query(q)
+                .unwrap_or_else(|_| panic!("failed to parse query: '{q}'"));
+            println!("{parsed:#?}");
+        }
+    }
+
+    #[test]
+    fn test_reducer() {
+        assert_eq!(query_parser::reducer("mean").unwrap(), Reducer::Mean);
+        assert!(query_parser::reducer("foo").is_err());
+    }
+
+    #[test]
+    fn test_parse_literal_timestamp_string() {
+        assert_eq!(
+            query_parser::timestamp_string("@2020-01-01").unwrap(),
+            Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap(),
+        );
+        assert_eq!(
+            query_parser::timestamp_string("@01:01:01").unwrap().time(),
+            NaiveTime::from_hms_opt(1, 1, 1).unwrap(),
+        );
+        assert_eq!(
+            query_parser::timestamp_string("@01:01:01.123456").unwrap().time(),
+            NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap(),
+        );
+        assert_eq!(
+            query_parser::timestamp_string("@2020-01-01T01:01:01.123456")
+                .unwrap(),
+            NaiveDateTime::new(
+                NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(),
+                NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap(),
+            )
+            .and_utc(),
+        );
+    }
+
+    #[test]
+    fn test_parse_ipv4_literal() {
+        let check = |s: &str, addr: IpAddr| {
+            let Literal::IpAddr(ip) = query_parser::ip_literal(s).unwrap()
+            else {
+                panic!("expected '{}' to be parsed into {}", s, addr);
+            };
+            assert_eq!(ip, addr);
+        };
+        check("\"100.100.100.100\"", Ipv4Addr::new(100, 100, 100, 100).into());
+        check("\"1.2.3.4\"", Ipv4Addr::new(1, 2, 3, 4).into());
+        check("\"0.0.0.0\"", Ipv4Addr::UNSPECIFIED.into());
+
+        assert!(query_parser::ip_literal("\"abcd\"").is_err());
+        assert!(query_parser::ip_literal("\"1.1.1.\"").is_err());
+        assert!(query_parser::ip_literal("\"1.1.1.1.1.1\"").is_err());
+        assert!(query_parser::ip_literal("\"2555.1.1.1\"").is_err());
+        assert!(query_parser::ip_literal("1.2.3.4").is_err()); // no quotes
+    }
+
+    #[test]
+    fn test_parse_ipv6_literal() {
+        let check = |s: &str, addr: IpAddr| {
+            let Literal::IpAddr(ip) = query_parser::ip_literal(s).unwrap()
+            else {
+                panic!("expected '{}' to be parsed into {}", s, addr);
+            };
+            assert_eq!(ip, addr);
+        };
+
+        // IPv6 is nuts, let's just check a few common patterns.
+        check("\"::1\"", Ipv6Addr::LOCALHOST.into());
+        check("\"::\"", Ipv6Addr::UNSPECIFIED.into());
+        check("\"fd00::1\"", Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1).into());
+        check(
+            "\"fd00:1:2:3:4:5:6:7\"",
+            Ipv6Addr::new(0xfd00, 1, 2, 3, 4, 5, 6, 7).into(),
+        );
+
+        // Don't currently support IPv6-mapped IPv4 addresses
+        assert!(query_parser::ip_literal("\"::ffff:127.0.0.1\"").is_err());
+
+        // Other obviously bad patterns.
+        assert!(query_parser::ip_literal("\"1\"").is_err());
+        assert!(query_parser::ip_literal("\":1::1::1\"").is_err());
+        assert!(query_parser::ip_literal("\"::g\"").is_err());
+        assert!(query_parser::ip_literal("\":::\"").is_err());
+        assert!(query_parser::ip_literal("::1").is_err()); // no quotes
+    }
+
+    #[test]
+    fn test_query_starts_with_get() {
+        assert!(query_parser::query("{ get a:b }")
+            .unwrap()
+            .all_gets_at_query_start());
+        assert!(query_parser::query("{ get a:b; get a:b } | join")
+            .unwrap()
+            .all_gets_at_query_start());
+        assert!(query_parser::query(
+            "{ { get a:b ; get a:b } | join; get c:d } | join"
+        )
+        .unwrap()
+        .all_gets_at_query_start());
+
+        assert!(query_parser::query("{ get a:b; filter foo == 0 }").is_err());
+        assert!(query_parser::query("{ get a:b; filter foo == 0 }").is_err());
+        assert!(query_parser::query("get a:b | get a:b").is_err());
+    }
+
+    #[test]
+    fn test_now_with_offset() {
+        fn check(expr: &str, expected: DateTime<Utc>) {
+            // Rough but still-useful bound in microseconds.
+            const MAX_DIFF_IN_MICROS: i64 = 1000;
+            let d = query_parser::now_timestamp(expr).unwrap();
+            let now = Utc::now();
+            let micros = d.timestamp_micros() - expected.timestamp_micros();
+            assert!(
+                micros.abs() <= MAX_DIFF_IN_MICROS,
+                "Expected `{}` to be within {}us of {}, but it is {}us away",
+                expr,
+                MAX_DIFF_IN_MICROS,
+                now,
+                micros,
+            );
+        }
+        check("@now() - 5m", Utc::now() - Duration::from_secs(60 * 5));
+        check("@now() + 5m", Utc::now() + Duration::from_secs(60 * 5));
+        check("@now() - 5s", Utc::now() - Duration::from_secs(5));
+        check("@now() + 5s", Utc::now() + Duration::from_secs(5));
+        check("@now() - 1d", Utc::now() - Duration::from_secs(60 * 60 * 24));
+        check("@now() + 1d", Utc::now() + Duration::from_secs(60 * 60 * 24));
+    }
+
+    #[test]
+    fn test_like_only_available_for_strings() {
+        assert!(query_parser::filter_expr("foo ~= 0").is_err());
+        assert!(query_parser::filter_expr("foo ~= \"something\"").is_ok());
+    }
+
+    #[test]
+    fn test_align_table_op() {
+        assert_eq!(
+            query_parser::align("align interpolate(1m)").unwrap(),
+            Align {
+                method: AlignmentMethod::Interpolate,
+                period: Duration::from_secs(60)
+            }
+        );
+        assert_eq!(
+            query_parser::align("align mean_within(100s)").unwrap(),
+            Align {
+                method: AlignmentMethod::MeanWithin,
+                period: Duration::from_secs(100)
+            }
+        );
+
+        assert!(query_parser::align("align whatever(100s)").is_err());
+        assert!(query_parser::align("align interpolate('foo')").is_err());
+    }
+
+    #[test]
+    fn test_complicated_logical_combinations() {
+        let parsed =
+            query_parser::logical_or_expr("a == 'b' ^ !(c == 0) && d == false")
+                .unwrap();
+
+        // Build up this expected expression from its components.
+        let left = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::String("b".into()),
+            }),
+        };
+        let middle = Filter {
+            negated: true,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("c".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Integer(0),
+            }),
+        };
+        let right = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("d".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Boolean(false),
+            }),
+        };
+
+        // The left and right are bound most tightly, by the XOR operator.
+        let xor = Filter {
+            negated: false,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(left),
+                op: LogicalOp::Xor,
+                right: Box::new(middle),
+            }),
+        };
+
+        // And then those two together are joined with the AND.
+        let expected = Filter {
+            negated: false,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(xor),
+                op: LogicalOp::And,
+                right: Box::new(right),
+            }),
+        };
+        assert_eq!(parsed, expected);
+    }
+
+    #[test]
+    fn test_multiple_negation() {
+        let negated =
+            query_parser::filter_expr("(a == 0) || !!!(a == 0 && a == 0)")
+                .unwrap();
+        let expected =
+            query_parser::filter_expr("(a == 0) || !(a == 0 && a == 0)")
+                .unwrap();
+        assert_eq!(negated, expected, "Failed to handle multiple negations");
+    }
+}
diff --git a/oximeter/db/src/oxql/ast/ident.rs b/oximeter/db/src/oxql/ast/ident.rs
new file mode 100644
index 0000000000..6fb2dab85a
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/ident.rs
@@ -0,0 +1,25 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! OxQL identifiers, such as column names.
+
+// Copyright 2024 Oxide Computer Company
+
+use std::fmt;
+
+/// An identifier, such as a column or function name.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Ident(pub(in crate::oxql) String);
+
+impl Ident {
+    pub fn as_str(&self) -> &str {
+        self.0.as_str()
+    }
+}
+
+impl fmt::Display for Ident {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
diff --git a/oximeter/db/src/oxql/ast/literal.rs b/oximeter/db/src/oxql/ast/literal.rs
new file mode 100644
index 0000000000..33f3d81485
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/literal.rs
@@ -0,0 +1,384 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! AST node for literal values.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::ast::cmp::Comparison;
+use crate::oxql::Error;
+use anyhow::Context;
+use chrono::DateTime;
+use chrono::Utc;
+use oximeter::FieldType;
+use oximeter::FieldValue;
+use regex::Regex;
+use std::fmt;
+use std::net::IpAddr;
+use std::time::Duration;
+use uuid::Uuid;
+
+/// A literal value.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Literal {
+    // TODO-performance: An i128 here is a bit gratuitous.
+    Integer(i128),
+    Double(f64),
+    String(String),
+    Boolean(bool),
+    Uuid(Uuid),
+    Duration(Duration),
+    Timestamp(DateTime<Utc>),
+    IpAddr(IpAddr),
+}
+
+impl Literal {
+    // Format the literal as a safe, typed string for ClickHouse.
+    pub(crate) fn as_db_safe_string(&self) -> String {
+        match self {
+            Literal::Integer(inner) => format!("{inner}"),
+            Literal::Double(inner) => format!("{inner}"),
+            Literal::String(inner) => format!("'{inner}'"),
+            Literal::Boolean(inner) => format!("{inner}"),
+            Literal::Uuid(inner) => format!("'{inner}'"),
+            Literal::Duration(inner) => {
+                let (count, interval) = duration_to_db_interval(inner);
+                format!("INTERVAL {} {}", count, interval)
+            }
+            Literal::Timestamp(inner) => {
+                format!("'{}'", inner.format(crate::DATABASE_TIMESTAMP_FORMAT))
+            }
+            Literal::IpAddr(inner) => {
+                // NOTE: We store all IP addresses in ClickHouse as IPv6, with
+                // IPv4 addresses mapped to that. To run a comparison against a
+                // literal in Rust, we can use the value directly, since we
+                // decode it an convert to the right type during
+                // deserialization. But to compare in the DB itself, we need to
+                // do that with an IPv4-mapped IPv6 address.
+                //
+                // Helpfully, ClickHouse's `toIPv6` function takes a string of
+                // either family, and maps IPv4 into the IPv6 space, if needed.
+                format!("toIPv6('{inner}')")
+            }
+        }
+    }
+
+    // Return true if this literal can be compared to a field of the provided
+    // type.
+    pub(crate) fn is_compatible_with_field(
+        &self,
+        field_type: FieldType,
+    ) -> bool {
+        match self {
+            Literal::Integer(_) => matches!(
+                field_type,
+                FieldType::U8
+                    | FieldType::I8
+                    | FieldType::U16
+                    | FieldType::I16
+                    | FieldType::U32
+                    | FieldType::I32
+                    | FieldType::U64
+                    | FieldType::I64
+            ),
+            Literal::Double(_) => false,
+            Literal::String(_) => matches!(field_type, FieldType::String),
+            Literal::Boolean(_) => matches!(field_type, FieldType::Bool),
+            Literal::Uuid(_) => matches!(field_type, FieldType::Uuid),
+            Literal::Duration(_) => false,
+            Literal::Timestamp(_) => false,
+            Literal::IpAddr(_) => matches!(field_type, FieldType::IpAddr),
+        }
+    }
+
+    /// Apply the comparison op between self and the provided field.
+    ///
+    /// Return None if the comparison cannot be applied, either because the type
+    /// is not compatible or the comparison doesn't make sense.
+    pub(crate) fn compare_field(
+        &self,
+        value: &FieldValue,
+        cmp: Comparison,
+    ) -> Result<Option<bool>, Error> {
+        anyhow::ensure!(
+            self.is_compatible_with_field(value.field_type()),
+            "Field value of type {} is cannot be compared to \
+            the value in this filter",
+            value.field_type(),
+        );
+        macro_rules! generate_cmp_match {
+            ($lhs:ident, $rhs:ident) => {
+                match cmp {
+                    Comparison::Eq => Ok(Some($lhs == $rhs)),
+                    Comparison::Ne => Ok(Some($lhs != $rhs)),
+                    Comparison::Gt => Ok(Some($lhs > $rhs)),
+                    Comparison::Ge => Ok(Some($lhs >= $rhs)),
+                    Comparison::Lt => Ok(Some($lhs < $rhs)),
+                    Comparison::Le => Ok(Some($lhs <= $rhs)),
+                    Comparison::Like => Ok(None),
+                }
+            };
+        }
+        // Filter expressions are currently written as `<ident> <cmp>
+        // <literal>`. That means the literal stored in `self` is the RHS of
+        // the comparison, and the field value passed in is the LHS.
+        match (value, self) {
+            (FieldValue::Bool(lhs), Literal::Boolean(rhs)) => {
+                generate_cmp_match!(rhs, lhs)
+            }
+            (FieldValue::String(lhs), Literal::String(rhs)) => match cmp {
+                Comparison::Eq => Ok(Some(lhs == rhs)),
+                Comparison::Ne => Ok(Some(lhs != rhs)),
+                Comparison::Gt => Ok(Some(lhs > rhs)),
+                Comparison::Ge => Ok(Some(lhs >= rhs)),
+                Comparison::Lt => Ok(Some(lhs < rhs)),
+                Comparison::Le => Ok(Some(lhs <= rhs)),
+                Comparison::Like => {
+                    let re = Regex::new(rhs).context(
+                        "failed to create regex for string matching",
+                    )?;
+                    Ok(Some(re.is_match(lhs)))
+                }
+            },
+            (FieldValue::IpAddr(lhs), Literal::IpAddr(rhs)) => {
+                generate_cmp_match!(rhs, lhs)
+            }
+            (FieldValue::Uuid(lhs), Literal::Uuid(rhs)) => {
+                generate_cmp_match!(rhs, lhs)
+            }
+            (FieldValue::U8(lhs), Literal::Integer(rhs)) => {
+                let lhs = i128::from(*lhs);
+                let rhs = *rhs;
+                generate_cmp_match!(lhs, rhs)
+            }
+            (FieldValue::I8(lhs), Literal::Integer(rhs)) => {
+                let lhs = i128::from(*lhs);
+                let rhs = *rhs;
+                generate_cmp_match!(lhs, rhs)
+            }
+            (FieldValue::U16(lhs), Literal::Integer(rhs)) => {
+                let lhs = i128::from(*lhs);
+                let rhs = *rhs;
+                generate_cmp_match!(lhs, rhs)
+            }
+            (FieldValue::I16(lhs), Literal::Integer(rhs)) => {
+                let lhs = i128::from(*lhs);
+                let rhs = *rhs;
+                generate_cmp_match!(lhs, rhs)
+            }
+            (FieldValue::U32(lhs), Literal::Integer(rhs)) => {
+                let lhs = i128::from(*lhs);
+                let rhs = *rhs;
+                generate_cmp_match!(lhs, rhs)
+            }
+            (FieldValue::I32(lhs), Literal::Integer(rhs)) => {
+                let lhs = i128::from(*lhs);
+                let rhs = *rhs;
+                generate_cmp_match!(lhs, rhs)
+            }
+            (FieldValue::U64(lhs), Literal::Integer(rhs)) => {
+                let lhs = i128::from(*lhs);
+                let rhs = *rhs;
+                generate_cmp_match!(lhs, rhs)
+            }
+            (FieldValue::I64(lhs), Literal::Integer(rhs)) => {
+                let lhs = i128::from(*lhs);
+                let rhs = *rhs;
+                generate_cmp_match!(lhs, rhs)
+            }
+            (_, _) => unreachable!(),
+        }
+    }
+}
+
+/// Duration constants used for interpreting duration literals.
+///
+/// Many of the values here are **approximate**. For example, a "year" is always
+/// 365 24-hour periods, regardless of leap years, the current time, or any
+/// other context.
+pub(crate) mod duration_consts {
+    use std::time::Duration;
+
+    /// Approximately 1 year, 365 24-hour periods.
+    pub const YEAR: Duration = Duration::from_secs(60 * 60 * 24 * 365);
+
+    /// Approximately 1 month, 30 24-hour periods.
+    pub const MONTH: Duration = Duration::from_secs(60 * 60 * 24 * 30);
+
+    /// Approximately 1 week, 7 24-hour periods.
+    pub const WEEK: Duration = Duration::from_secs(60 * 60 * 24 * 7);
+
+    /// One day, equal to 24 hours.
+    pub const DAY: Duration = Duration::from_secs(60 * 60 * 24);
+
+    /// An hour, exactly 3600 seconds.
+    pub const HOUR: Duration = Duration::from_secs(60 * 60);
+
+    /// A minute, exactly 60 seconds.
+    pub const MINUTE: Duration = Duration::from_secs(60);
+
+    /// One second.
+    pub const SECOND: Duration = Duration::from_secs(1);
+
+    /// One millisecond, a thousandth of a second.
+    pub const MILLISECOND: Duration = Duration::from_millis(1);
+
+    /// One microsecond, a millionth of a second.
+    pub const MICROSECOND: Duration = Duration::from_micros(1);
+
+    /// One nanosecond, a billionth of a second.
+    pub const NANOSECOND: Duration = Duration::from_nanos(1);
+}
+
+// Convert a duration into an appropriate interval for a database query.
+//
+// This converts the provided duration into the largest interval type for which
+// the value is an integer. For example:
+//
+// `1us` -> (1, "MICROSECOND"),
+// `3.4s` -> (3400, "MILLISECOND")
+fn duration_to_db_interval(dur: &Duration) -> (u64, &'static str) {
+    fn as_whole_multiple(dur: &Duration, base: &Duration) -> Option<u64> {
+        let d = dur.as_nanos();
+        let base = base.as_nanos();
+        if d % base == 0 {
+            Some(u64::try_from(d / base).unwrap())
+        } else {
+            None
+        }
+    }
+    use duration_consts::*;
+    const INTERVALS: [(Duration, &str); 10] = [
+        (YEAR, "YEAR"),
+        (MONTH, "MONTH"),
+        (WEEK, "WEEK"),
+        (DAY, "DAY"),
+        (HOUR, "HOUR"),
+        (MINUTE, "MINUTE"),
+        (SECOND, "SECOND"),
+        (MILLISECOND, "MILLISECOND"),
+        (MICROSECOND, "MICROSECOND"),
+        (NANOSECOND, "NANOSECOND"),
+    ];
+    for (base, interval) in &INTERVALS {
+        if let Some(count) = as_whole_multiple(dur, base) {
+            return (count, interval);
+        }
+    }
+
+    // Durations must be a whole number of nanoseconds, so we will never fall
+    // past the last interval in the array above.
+    unreachable!();
+}
+
+impl fmt::Display for Literal {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Literal::Integer(inner) => write!(f, "{inner}"),
+            Literal::Double(inner) => write!(f, "{inner}"),
+            Literal::String(inner) => write!(f, "{inner:?}"),
+            Literal::Boolean(inner) => write!(f, "{inner}"),
+            Literal::Uuid(inner) => write!(f, "\"{inner}\""),
+            Literal::Duration(inner) => write!(f, "{inner:?}"),
+            Literal::Timestamp(inner) => write!(f, "@{inner}"),
+            Literal::IpAddr(inner) => write!(f, "{inner}"),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::duration_consts::*;
+    use super::duration_to_db_interval;
+    use super::Literal;
+    use crate::oxql::ast::cmp::Comparison;
+    use oximeter::FieldValue;
+
+    #[test]
+    fn test_duration_to_db_interval() {
+        for base in [1_u32, 2, 3] {
+            let b = u64::from(base);
+            assert_eq!(duration_to_db_interval(&(base * YEAR)), (b, "YEAR"));
+            assert_eq!(duration_to_db_interval(&(base * MONTH)), (b, "MONTH"));
+            assert_eq!(duration_to_db_interval(&(base * WEEK)), (b, "WEEK"));
+            assert_eq!(duration_to_db_interval(&(base * DAY)), (b, "DAY"));
+            assert_eq!(duration_to_db_interval(&(base * HOUR)), (b, "HOUR"));
+            assert_eq!(
+                duration_to_db_interval(&(base * MINUTE)),
+                (b, "MINUTE")
+            );
+            assert_eq!(
+                duration_to_db_interval(&(base * SECOND)),
+                (b, "SECOND")
+            );
+            assert_eq!(
+                duration_to_db_interval(&(base * MILLISECOND)),
+                (b, "MILLISECOND")
+            );
+            assert_eq!(
+                duration_to_db_interval(&(base * MICROSECOND)),
+                (b, "MICROSECOND")
+            );
+            assert_eq!(
+                duration_to_db_interval(&(base * NANOSECOND)),
+                (b, "NANOSECOND")
+            );
+        }
+        assert_eq!(duration_to_db_interval(&(YEAR / 2)), (4380, "HOUR"));
+        assert_eq!(duration_to_db_interval(&(HOUR / 60)), (1, "MINUTE"));
+        assert_eq!(duration_to_db_interval(&(HOUR / 10)), (6, "MINUTE"));
+        assert_eq!(duration_to_db_interval(&(HOUR / 12)), (5, "MINUTE"));
+        assert_eq!(duration_to_db_interval(&(HOUR / 120)), (30, "SECOND"));
+        assert_eq!(duration_to_db_interval(&(MINUTE / 2)), (30, "SECOND"));
+        assert_eq!(duration_to_db_interval(&(MINUTE / 10)), (6, "SECOND"));
+        assert_eq!(
+            duration_to_db_interval(&MINUTE.mul_f64(1.5)),
+            (90, "SECOND")
+        );
+        assert_eq!(
+            duration_to_db_interval(&MICROSECOND.mul_f64(1.5)),
+            (1500, "NANOSECOND")
+        );
+        assert_eq!(
+            duration_to_db_interval(&(YEAR + NANOSECOND)),
+            (31536000000000001, "NANOSECOND")
+        );
+    }
+
+    #[test]
+    fn test_literal_compare_field() {
+        let value = FieldValue::I64(3);
+        let lit = Literal::Integer(4);
+
+        // The literal comparison would be written like: `field >= 4` where
+        // `field` has a value of 3 here. So the comparison is false.
+        assert_eq!(
+            lit.compare_field(&value, Comparison::Ge).unwrap(),
+            Some(false)
+        );
+
+        // Reversing this, we should have true.
+        assert_eq!(
+            lit.compare_field(&value, Comparison::Lt).unwrap(),
+            Some(true)
+        );
+
+        // It should not be equal.
+        assert_eq!(
+            lit.compare_field(&value, Comparison::Eq).unwrap(),
+            Some(false)
+        );
+        assert_eq!(
+            lit.compare_field(&value, Comparison::Ne).unwrap(),
+            Some(true)
+        );
+    }
+
+    #[test]
+    fn test_literal_compare_field_wrong_type() {
+        let value = FieldValue::String(String::from("foo"));
+        let lit = Literal::Integer(4);
+        assert!(lit.compare_field(&value, Comparison::Eq).is_err());
+    }
+}
diff --git a/oximeter/db/src/oxql/ast/logical_op.rs b/oximeter/db/src/oxql/ast/logical_op.rs
new file mode 100644
index 0000000000..60fc5d134f
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/logical_op.rs
@@ -0,0 +1,41 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! An AST node describing logical operators.
+
+// Copyright 2024 Oxide Computer Company
+
+use std::fmt;
+
+/// Logical operators.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum LogicalOp {
+    And,
+    Or,
+    Xor,
+}
+
+impl LogicalOp {
+    pub(crate) fn as_db_function_name(&self) -> &'static str {
+        match self {
+            LogicalOp::And => "and",
+            LogicalOp::Or => "or",
+            LogicalOp::Xor => "xor",
+        }
+    }
+}
+
+impl fmt::Display for LogicalOp {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{}",
+            match self {
+                LogicalOp::And => "&&",
+                LogicalOp::Or => "||",
+                LogicalOp::Xor => "^",
+            }
+        )
+    }
+}
diff --git a/oximeter/db/src/oxql/ast/mod.rs b/oximeter/db/src/oxql/ast/mod.rs
new file mode 100644
index 0000000000..7037b74a7f
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/mod.rs
@@ -0,0 +1,152 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! AST for the Oximeter Query Language.
+
+// Copyright 2024 Oxide Computer Company
+
+use chrono::DateTime;
+use chrono::Utc;
+use oximeter::TimeseriesName;
+
+use self::table_ops::BasicTableOp;
+use self::table_ops::GroupedTableOp;
+use self::table_ops::TableOp;
+pub mod cmp;
+pub(super) mod grammar;
+pub mod ident;
+pub mod literal;
+pub mod logical_op;
+pub mod table_ops;
+
+/// An OxQL query.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Query {
+    ops: Vec<TableOp>,
+}
+
+impl Query {
+    // Return the first operation in the query, which is always a form of `get`.
+    fn first_op(&self) -> &TableOp {
+        self.ops.first().expect("Should have parsed at least 1 operation")
+    }
+
+    pub(crate) fn timeseries_name(&self) -> &TimeseriesName {
+        match self.first_op() {
+            TableOp::Basic(BasicTableOp::Get(n)) => n,
+            TableOp::Basic(_) => unreachable!(),
+            TableOp::Grouped(GroupedTableOp { ops }) => {
+                ops.first().unwrap().timeseries_name()
+            }
+        }
+    }
+
+    // Check that this query (and any subqueries) start with a get table op, and
+    // that there are no following get operations. I.e., we have:
+    //
+    // get ... | <no gets>
+    // { get .. } | <no gets>
+    // { get .. ; get .. } | <no gets>
+    pub(crate) fn all_gets_at_query_start(&self) -> bool {
+        fn all_gets_at_query_start(ops: &[TableOp]) -> bool {
+            let (head, tail) = ops.split_at(1);
+            match &head[0] {
+                // If the head is a get, check that there are no following get
+                // operations.
+                TableOp::Basic(BasicTableOp::Get(_)) => {
+                    !tail.iter().any(|op| {
+                        matches!(op, TableOp::Basic(BasicTableOp::Get(_)))
+                    })
+                }
+                // Cannot start with any other basic op.
+                TableOp::Basic(_) => false,
+                // Recurse for grouped ops.
+                TableOp::Grouped(GroupedTableOp { ops }) => {
+                    ops.iter().all(Query::all_gets_at_query_start)
+                }
+            }
+        }
+        all_gets_at_query_start(&self.ops)
+    }
+
+    // Return the non-get table transformations.
+    pub(crate) fn transformations(&self) -> &[TableOp] {
+        &self.ops[1..]
+    }
+
+    // Split the query into either:
+    //
+    // - a list of nested queries and the remaining table ops in self, or
+    // - the flat query contained in self.
+    pub(crate) fn split(&self, query_end_time: DateTime<Utc>) -> SplitQuery {
+        match &self.ops[0] {
+            TableOp::Basic(BasicTableOp::Get(_)) => {
+                SplitQuery::Flat(crate::oxql::Query {
+                    parsed: self.clone(),
+                    end_time: query_end_time,
+                })
+            }
+            TableOp::Basic(_) => unreachable!(),
+            TableOp::Grouped(GroupedTableOp { ops }) => SplitQuery::Nested {
+                subqueries: ops
+                    .iter()
+                    .cloned()
+                    .map(|parsed| crate::oxql::Query {
+                        parsed,
+                        end_time: query_end_time,
+                    })
+                    .collect(),
+                transformations: self.ops[1..].to_vec(),
+            },
+        }
+    }
+
+    // Return the last referenced timestamp in the query, if any.
+    pub(crate) fn query_end_time(&self) -> Option<DateTime<Utc>> {
+        match &self.ops[0] {
+            TableOp::Basic(BasicTableOp::Get(_)) => self
+                .transformations()
+                .iter()
+                .filter_map(|op| {
+                    let TableOp::Basic(BasicTableOp::Filter(filter)) = op
+                    else {
+                        return None;
+                    };
+                    filter.last_timestamp()
+                })
+                .max(),
+            TableOp::Basic(_) => unreachable!(),
+            TableOp::Grouped(GroupedTableOp { ops }) => {
+                let grouped_max =
+                    ops.iter().filter_map(Self::query_end_time).max();
+                let op_max = self
+                    .transformations()
+                    .iter()
+                    .filter_map(|op| {
+                        let TableOp::Basic(BasicTableOp::Filter(filter)) = op
+                        else {
+                            return None;
+                        };
+                        filter.last_timestamp()
+                    })
+                    .max();
+                grouped_max.max(op_max)
+            }
+        }
+    }
+}
+
+// Either a flat query or one with nested subqueries.
+//
+// OxQL supports subqueries. Though they can be nested, they must always be at
+// the front of a query. This represents either a query that is flat, _or_ that
+// prefix of subqueries and the following transformations.
+#[derive(Clone, Debug, PartialEq)]
+pub(crate) enum SplitQuery {
+    Flat(crate::oxql::Query),
+    Nested {
+        subqueries: Vec<crate::oxql::Query>,
+        transformations: Vec<TableOp>,
+    },
+}
diff --git a/oximeter/db/src/oxql/ast/table_ops/align.rs b/oximeter/db/src/oxql/ast/table_ops/align.rs
new file mode 100644
index 0000000000..cf54ebc312
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/table_ops/align.rs
@@ -0,0 +1,753 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! An AST node describing timeseries alignment operations.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::point::DataType;
+use crate::oxql::point::MetricType;
+use crate::oxql::point::Points;
+use crate::oxql::point::ValueArray;
+use crate::oxql::point::Values;
+use crate::oxql::query::Alignment;
+use crate::oxql::Error;
+use crate::oxql::Table;
+use crate::oxql::Timeseries;
+use anyhow::Context;
+use chrono::DateTime;
+use chrono::TimeDelta;
+use chrono::Utc;
+use std::time::Duration;
+
+// The maximum factor by which an alignment operation may upsample data.
+//
+// This is a crude way to limit the size of a query result. We do not currently
+// paginate the results of OxQL queries, so we need to find other ways to avoid
+// DOS attacks due to large query results.
+//
+// While we also apply limits on the total number of samples fetched from the
+// ClickHouse database, this alone is insufficient. For example, suppose we have
+// two samples, spaced 1 second apart, which are then passed to an alignment
+// table operation with a period of 1 nanosecond. Now you have a billion points!
+//
+// To prevent this, we restrict the total amount by which any alignment
+// operation can upsample the data. Another way to think of it is that this
+// limits the ratio between the requested period and the largest interval
+// between timestamps in the data.
+const MAX_UPSAMPLING_RATIO: u128 = 10;
+
+fn verify_max_upsampling_ratio(
+    timestamps: &[DateTime<Utc>],
+    period: &Duration,
+) -> Result<(), Error> {
+    let period = period.as_nanos();
+    let max = MAX_UPSAMPLING_RATIO * period;
+    for (t1, t0) in timestamps.iter().skip(1).zip(timestamps.iter()) {
+        let Some(nanos) = t1.signed_duration_since(t0).num_nanoseconds() else {
+            anyhow::bail!("Overflow computing timestamp delta");
+        };
+        assert!(nanos > 0, "Timestamps should be sorted");
+        let nanos = nanos as u128;
+        anyhow::ensure!(
+            nanos <= max,
+            "A table alignment operation may not upsample data by \
+            more than a factor of {MAX_UPSAMPLING_RATIO}"
+        );
+    }
+    Ok(())
+}
+
+/// An `align` table operation, used to produce data at well-defined periods.
+///
+/// Alignment is important for any kind of aggregation. Data is actually
+/// produced at variable intervals, under the control of the producer itself.
+/// This means that in general, two timeseries that are related (say, the same
+/// schema) may have data samples at slightly different timestamps.
+///
+/// Alignment is used to produce data at the defined timestamps, so that samples
+/// from multiple timeseries may be combined or correlated in meaningful ways.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Align {
+    /// The alignment method, used to describe how data over the input period
+    /// is used to generate an output sample.
+    pub method: AlignmentMethod,
+    // TODO-completeness. We'd like to separate the concept of the period, the
+    // interval on which data is produced by this alignment, and the input
+    // window, the range of time in the past over which data is considered to
+    // produce the output values.
+    //
+    // For example, we might want to produce a moving average, by considering
+    // the last 1h of data, and produce an output value every 10m. Each of those
+    // output values would share 50m of data with the points on either side.
+    //
+    // For now, we'll enforce that the output period and input window are the
+    // same.
+    pub period: Duration,
+}
+
+impl Align {
+    // Apply the alignment function to the set of tables.
+    pub(crate) fn apply(
+        &self,
+        tables: &[Table],
+        query_end: &DateTime<Utc>,
+    ) -> Result<Vec<Table>, Error> {
+        match self.method {
+            AlignmentMethod::Interpolate => tables
+                .iter()
+                .map(|table| align_interpolate(table, query_end, &self.period))
+                .collect(),
+            AlignmentMethod::MeanWithin => tables
+                .iter()
+                .map(|table| align_mean_within(table, query_end, &self.period))
+                .collect(),
+        }
+    }
+}
+
+/// An alignment method.
+#[derive(Clone, Debug, PartialEq)]
+pub enum AlignmentMethod {
+    /// Alignment is done by interpolating the output data at the specified
+    /// period.
+    Interpolate,
+    /// Alignment is done by computing the mean of the output data within the
+    /// specified period.
+    MeanWithin,
+}
+
+// Align the timeseries in a table by computing the average within each output
+// period.
+fn align_mean_within(
+    table: &Table,
+    query_end: &DateTime<Utc>,
+    period: &Duration,
+) -> Result<Table, Error> {
+    let mut output_table = Table::new(table.name());
+    for timeseries in table.iter() {
+        let points = &timeseries.points;
+        anyhow::ensure!(
+            points.dimensionality() == 1,
+            "Aligning multidimensional timeseries is not yet supported"
+        );
+        let data_type = points.data_types().next().unwrap();
+        anyhow::ensure!(
+            data_type.is_numeric(),
+            "Alignment by mean requires numeric data type, not {}",
+            data_type
+        );
+        let metric_type = points.metric_type().unwrap();
+        anyhow::ensure!(
+            matches!(metric_type, MetricType::Gauge | MetricType::Delta),
+            "Alignment by mean requires a gauge or delta metric, not {}",
+            metric_type,
+        );
+        verify_max_upsampling_ratio(&points.timestamps, &period)?;
+
+        // Always convert the output to doubles, when computing the mean. The
+        // output is always a gauge, so we do not need the start times of the
+        // input either.
+        //
+        // IMPORTANT: We compute the mean in the loop below from the back of the
+        // array (latest timestamp) to the front (earliest timestamp). They are
+        // appended to these arrays here in that _reversed_ order. These arrays
+        // are flipped before pushing them onto the timeseries at the end of the
+        // loop below.
+        let mut output_values = Vec::with_capacity(points.len());
+        let mut output_timestamps = Vec::with_capacity(points.len());
+
+        // Convert the input to doubles now, so the tight loop below does less
+        // conversion / matching inside.
+        let input_points = match points.values(0).unwrap() {
+            ValueArray::Integer(values) => values
+                .iter()
+                .map(|maybe_int| maybe_int.map(|int| int as f64))
+                .collect(),
+            ValueArray::Double(values) => values.clone(),
+            _ => unreachable!(),
+        };
+
+        // Alignment works as follows:
+        //
+        // - Start at the end of the timestamp array, working our way backwards
+        // in time.
+        // - Create the output timestamp from the current step.
+        // - Find all points in the input array that are within the alignment
+        // period.
+        // - Compute the mean of those.
+        let period_ =
+            TimeDelta::from_std(*period).context("time delta out of range")?;
+        let first_timestamp = points.timestamps[0];
+        let mut ix: u32 = 0;
+        loop {
+            // Compute the next output timestamp, by shifting the query end time
+            // by the period and the index.
+            let time_offset = TimeDelta::from_std(ix * *period)
+                .context("time delta out of range")?;
+            let output_time = query_end
+                .checked_sub_signed(time_offset)
+                .context("overflow computing next output timestamp")?;
+            let window_start = output_time
+                .checked_sub_signed(period_)
+                .context("overflow computing next output window start")?;
+
+            // The output time is before any of the data in the input array,
+            // we're done. It's OK for the _start time_ to be before any input
+            // timestamps.
+            if output_time < first_timestamp {
+                break;
+            }
+
+            // Aggregate all values within this time window.
+            //
+            // This works a bit differently for gauge timeseries and deltas.
+            // Gauges are simpler, so let's consider them first. A point is
+            // "within" the window if the timestamp is within the window. Every
+            // point is either completely within or completely without the
+            // window, so we just add the values.
+            //
+            // Deltas have a start time, which makes things a bit more
+            // complicated. In that case, a point can overlap _partially_ with
+            // the output time window, and we'd like to take that partial
+            // overlap into account. To do that, we find relevant values which
+            // have either a start time or timestamp within the output window.
+            // We compute the fraction of overlap with the window, which is in
+            // [0.0, 1.0], and multiply the value by that fraction. One can
+            // think of this as a dot-product between the interval-overlap array
+            // and the value array, divided by the 1-norm, or number of nonzero
+            // entries.
+            let output_value = if matches!(metric_type, MetricType::Gauge) {
+                mean_gauge_value_in_window(
+                    &points.timestamps,
+                    &input_points,
+                    window_start,
+                    output_time,
+                )
+            } else {
+                mean_delta_value_in_window(
+                    points.start_times.as_ref().unwrap(),
+                    &points.timestamps,
+                    &input_points,
+                    window_start,
+                    output_time,
+                )
+            };
+            output_values.push(output_value);
+
+            // In any case, we push the window's end time and increment to the
+            // next period.
+            output_timestamps.push(output_time);
+            ix += 1;
+        }
+
+        // We've accumulated our input values into the output arrays, but in
+        // reverse order. Flip them and push onto the existing table, as a gauge
+        // timeseries.
+        let mut new_timeseries = Timeseries::new(
+            timeseries.fields.clone().into_iter(),
+            DataType::Double,
+            MetricType::Gauge,
+        )
+        .unwrap();
+        let values =
+            ValueArray::Double(output_values.into_iter().rev().collect());
+        let timestamps = output_timestamps.into_iter().rev().collect();
+        let values = Values { values, metric_type: MetricType::Gauge };
+        new_timeseries.points =
+            Points { start_times: None, timestamps, values: vec![values] };
+        new_timeseries.alignment =
+            Some(Alignment { end_time: *query_end, period: *period });
+        output_table.insert(new_timeseries).unwrap();
+    }
+    Ok(output_table)
+}
+
+// Given an interval start and end, and a window start and end, compute the
+// fraction of the _interval_ that the time window represents.
+fn fraction_overlap_with_window(
+    interval_start: DateTime<Utc>,
+    interval_end: DateTime<Utc>,
+    window_start: DateTime<Utc>,
+    window_end: DateTime<Utc>,
+) -> f64 {
+    assert!(interval_start < interval_end);
+    assert!(window_start < window_end);
+    let end = window_end.min(interval_end);
+    let start = window_start.max(interval_start);
+    let contained_size = (end - start).num_nanoseconds().unwrap() as f64;
+    if contained_size < 0.0 {
+        return 0.0;
+    }
+    let interval_size =
+        (interval_end - interval_start).num_nanoseconds().unwrap() as f64;
+    let fraction = contained_size / interval_size;
+    assert!(fraction >= 0.0);
+    assert!(fraction <= 1.0);
+    fraction
+}
+
+// For a delta metric, compute the mean of points falling within the provided
+// window.
+//
+// This uses both the start and end times when considering each point. Each
+// point's value is weighted by the faction of overlap with the window.
+fn mean_delta_value_in_window(
+    start_times: &[DateTime<Utc>],
+    timestamps: &[DateTime<Utc>],
+    input_points: &[Option<f64>],
+    window_start: DateTime<Utc>,
+    window_end: DateTime<Utc>,
+) -> Option<f64> {
+    // We can find the indices where the timestamp and start times separately
+    // overlap the window of interest. Then any interval is potentially of
+    // interest if _either_ its start time or timestamp is within the window.
+    //
+    // Since the start times are <= the timestamps, we can take the min of those
+    // two to get the first point that overlaps at all, and the max to get the
+    // last.
+    let first_timestamp = timestamps.partition_point(|t| t <= &window_start);
+    let last_timestamp = timestamps.partition_point(|t| t <= &window_end);
+    let first_start_time = start_times.partition_point(|t| t <= &window_start);
+    let last_start_time = start_times.partition_point(|t| t <= &window_end);
+    let first_index = first_timestamp.min(first_start_time);
+    let last_index = last_timestamp.max(last_start_time);
+
+    // Detect the possible case where the interval is entirely before or
+    // entirely after the window.
+    if first_index == last_index {
+        let t = *timestamps.get(first_timestamp)?;
+        let s = *start_times.get(first_timestamp)?;
+        if t < window_start || s > window_end {
+            return None;
+        }
+        let Some(val) = input_points[first_timestamp] else {
+            return None;
+        };
+        let fraction = fraction_overlap_with_window(
+            start_times[first_start_time],
+            timestamps[first_timestamp],
+            window_start,
+            window_end,
+        );
+        return Some(fraction * val);
+    }
+
+    // Compute the overlap for all points which have some overlap.
+    let starts = &start_times[first_index..last_index];
+    let times = &timestamps[first_index..last_index];
+    let vals = &input_points[first_index..last_index];
+    let iter = starts
+        .into_iter()
+        .copied()
+        .zip(times.into_iter().copied())
+        .zip(vals.into_iter().copied());
+    let count = (last_timestamp - first_timestamp).max(1) as f64;
+    let mut maybe_sum = None;
+    for it in iter.filter_map(|((start, time), maybe_val)| {
+        let Some(val) = maybe_val else {
+            return None;
+        };
+        let fraction =
+            fraction_overlap_with_window(start, time, window_start, window_end);
+        Some(fraction * val)
+    }) {
+        *maybe_sum.get_or_insert(0.0) += it;
+    }
+    maybe_sum.map(|sum| sum / count)
+}
+
+// For a gauge metric, compute the mean of points falling within the provided
+// window.
+fn mean_gauge_value_in_window(
+    timestamps: &[DateTime<Utc>],
+    input_points: &[Option<f64>],
+    window_start: DateTime<Utc>,
+    window_end: DateTime<Utc>,
+) -> Option<f64> {
+    // Find the position of the window start and end in the sorted
+    // array of input timestamps. The `partition_point()` method accepts
+    // a closure, which partitions the input into a prefix where the
+    // closure evaluates to true, and a suffix where it's false. It
+    // returns the first element in the suffix.
+    //
+    // So the first closure returns true for all timestamps we want to
+    // exclude, which are those up to and including the window start time.
+    // So we get the index of the first point strictly later than the
+    // window start.
+    //
+    // The second closure returns true for all points up to and
+    // including the output time as well.
+    let start_index = timestamps.partition_point(|t| t <= &window_start);
+    let output_index = timestamps.partition_point(|t| t <= &window_end);
+    assert!(output_index >= start_index);
+
+    // Accumulate the values over this set of indices.
+    //
+    // If there are really zero points in this time interval, we add
+    // a missing value.
+    if start_index != output_index {
+        let mut maybe_sum = None;
+        for it in input_points[start_index..output_index]
+            .iter()
+            .filter_map(|x| x.as_ref().copied())
+        {
+            *maybe_sum.get_or_insert(0.0) += it;
+        }
+        maybe_sum.map(|output_value| {
+            output_value / (output_index - start_index) as f64
+        })
+    } else {
+        None
+    }
+}
+
+fn align_interpolate(
+    _table: &Table,
+    _query_end: &DateTime<Utc>,
+    _period: &Duration,
+) -> Result<Table, Error> {
+    anyhow::bail!("Alignment with interpolation not yet implemented")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_fraction_overlap_with_window() {
+        let now = Utc::now();
+        let window_start = now - Duration::from_secs(1);
+        let window_end = now;
+        let interval_start = window_start;
+        let interval_end = window_end;
+        assert_eq!(
+            fraction_overlap_with_window(
+                interval_start,
+                interval_end,
+                window_start,
+                window_end,
+            ),
+            1.0
+        );
+
+        let window_start = now - Duration::from_secs(1);
+        let window_end = now;
+        let interval_start = window_start;
+        let interval_end = now - Duration::from_secs_f64(0.5);
+        assert_eq!(
+            fraction_overlap_with_window(
+                interval_start,
+                interval_end,
+                window_start,
+                window_end,
+            ),
+            1.0,
+            "This interval is aligned with the start time \
+            of the window, and contained entirely within it, \
+            so the fraction should be 1.0",
+        );
+
+        // If we reverse the window and interval, then the interval entirely
+        // contains the window, which is 50% of the interval.
+        let (window_start, window_end, interval_start, interval_end) =
+            (interval_start, interval_end, window_start, window_end);
+        assert_eq!(
+            fraction_overlap_with_window(
+                interval_start,
+                interval_end,
+                window_start,
+                window_end,
+            ),
+            0.5,
+            "The window is entirely contained within the interval, \
+            and covers 50% of it",
+        );
+
+        // If the interval is entirely contained in the window, we should have
+        // the entire interval as our fraction.
+        let window_start = now - Duration::from_secs(1);
+        let window_end = now;
+        let interval_start = window_start + Duration::from_secs_f64(0.25);
+        let interval_end = window_start + Duration::from_secs_f64(0.5);
+        assert_eq!(
+            fraction_overlap_with_window(
+                interval_start,
+                interval_end,
+                window_start,
+                window_end,
+            ),
+            1.0,
+            "The interval is entirely contained within the window",
+        );
+
+        // This is aligned at the right with the window end.
+        let window_start = now - Duration::from_secs(1);
+        let window_end = now;
+        let interval_start = window_start + Duration::from_secs_f64(0.25);
+        let interval_end = window_end;
+        assert_eq!(
+            fraction_overlap_with_window(
+                interval_start,
+                interval_end,
+                window_start,
+                window_end,
+            ),
+            1.0,
+            "The interval is aligned at right with the window, and \
+            entirely contained within it, so the fraction should still \
+            be 1.0",
+        );
+
+        // But if we reverse it again, the fraction should reveal itself.
+        let (window_start, window_end, interval_start, interval_end) =
+            (interval_start, interval_end, window_start, window_end);
+        assert_eq!(
+            fraction_overlap_with_window(
+                interval_start,
+                interval_end,
+                window_start,
+                window_end,
+            ),
+            0.75,
+            "The window represents 75% of the interval",
+        );
+
+        // This interval does not overlap at all, to the left.
+        let window_start = now - Duration::from_secs(1);
+        let window_end = now;
+        let interval_start = window_start - Duration::from_secs(2);
+        let interval_end = window_start - Duration::from_secs(1);
+        assert_eq!(
+            fraction_overlap_with_window(
+                interval_start,
+                interval_end,
+                window_start,
+                window_end,
+            ),
+            0.0,
+        );
+
+        // This interval does not overlap at all, to the right.
+        let window_start = now - Duration::from_secs(1);
+        let window_end = now;
+        let interval_start = window_start + Duration::from_secs(1);
+        let interval_end = window_start + Duration::from_secs(2);
+        assert_eq!(
+            fraction_overlap_with_window(
+                interval_start,
+                interval_end,
+                window_start,
+                window_end,
+            ),
+            0.0,
+        );
+    }
+
+    #[test]
+    fn test_mean_delta_value_in_window() {
+        let now = Utc::now();
+        let start_times = &[
+            now - Duration::from_secs(4),
+            now - Duration::from_secs(3),
+            now - Duration::from_secs(2),
+            now - Duration::from_secs(1),
+        ];
+        let timestamps = &[
+            now - Duration::from_secs(3),
+            now - Duration::from_secs(2),
+            now - Duration::from_secs(1),
+            now,
+        ];
+        let input_points = &[Some(0.0), Some(1.0), Some(2.0), Some(3.0)];
+
+        let window_start = now - Duration::from_secs_f64(0.5);
+        let window_end = now;
+        let mean = mean_delta_value_in_window(
+            start_times,
+            timestamps,
+            input_points,
+            window_start,
+            window_end,
+        )
+        .expect("This should overlap the last interval");
+        assert_eq!(
+            mean,
+            input_points.last().unwrap().unwrap() / 2.0,
+            "This overlaps the last interval by half",
+        );
+    }
+
+    #[test]
+    fn test_mean_gauge_value_in_window() {
+        let now = Utc::now();
+        let timestamps = &[
+            now - Duration::from_secs(3),
+            now - Duration::from_secs(2),
+            now - Duration::from_secs(1),
+            now,
+        ];
+        let input_points = &[Some(0.0), Some(1.0), Some(2.0), Some(3.0)];
+
+        let window_start = now - Duration::from_secs(4);
+        let window_end = now - Duration::from_secs(3);
+        let mean = mean_gauge_value_in_window(
+            timestamps,
+            input_points,
+            window_start,
+            window_end,
+        )
+        .expect("This window should overlap the first timestamp");
+        assert_eq!(
+            mean, 0.0,
+            "This window should overlap the first timestamp, so the \
+            mean value should be the mean of the first point only"
+        );
+
+        let window_start = now - Duration::from_secs(4);
+        let window_end = now - Duration::from_secs(2);
+        let mean = mean_gauge_value_in_window(
+            timestamps,
+            input_points,
+            window_start,
+            window_end,
+        )
+        .expect("This window should overlap the first two timestamps");
+        assert_eq!(
+            mean, 0.5,
+            "This window should overlap the first two timestamps, so the \
+            mean value should be the mean of the first two points"
+        );
+
+        let window_start = now - Duration::from_secs(3);
+        let window_end = now - Duration::from_secs(2);
+        let mean = mean_gauge_value_in_window(
+            timestamps,
+            input_points,
+            window_start,
+            window_end,
+        )
+        .expect("This window should overlap the second timestamps");
+        assert_eq!(
+            mean, 1.0,
+            "This window should overlap the second timestamp, so the \
+            mean value should be the mean of the second point only."
+        );
+
+        let window_start = now - Duration::from_secs(4);
+        let window_end = *timestamps.last().unwrap();
+        let mean = mean_gauge_value_in_window(
+            timestamps,
+            input_points,
+            window_start,
+            window_end,
+        )
+        .expect("This window should overlap the all timestamps");
+        assert_eq!(
+            mean,
+            input_points.iter().map(|x| x.unwrap()).sum::<f64>()
+                / input_points.len() as f64,
+            "This window should overlap the all timestamps, so the \
+            mean value should be the mean of all points",
+        );
+
+        let window_start = now - Duration::from_secs(3);
+        let window_end = now - Duration::from_secs_f64(2.5);
+        assert!(
+            mean_gauge_value_in_window(
+                timestamps,
+                input_points,
+                window_start,
+                window_end,
+            )
+            .is_none(),
+            "This window should overlap none of the points"
+        );
+    }
+
+    #[test]
+    fn test_verify_max_upsampling_ratio() {
+        // We'll use a 1 second period, and ensure that we allow downsampling,
+        // and upsampling up to the max factor. That's 1/10th of a second,
+        // currently.
+        let now = Utc::now();
+        let timestamps = &[now - Duration::from_secs(1), now];
+
+        // All values within the threshold.
+        for period in [
+            Duration::from_secs_f64(0.5),
+            Duration::from_secs(10),
+            Duration::from_millis(100),
+        ] {
+            assert!(verify_max_upsampling_ratio(timestamps, &period).is_ok());
+        }
+
+        // Just below the threshold.
+        assert!(verify_max_upsampling_ratio(
+            timestamps,
+            &Duration::from_millis(99),
+        )
+        .is_err());
+
+        // Sanity check for way below the threshold.
+        assert!(verify_max_upsampling_ratio(
+            timestamps,
+            &Duration::from_nanos(1),
+        )
+        .is_err());
+
+        // Arrays where we can't compute an interval are fine.
+        assert!(verify_max_upsampling_ratio(
+            &timestamps[..1],
+            &Duration::from_nanos(1),
+        )
+        .is_ok());
+        assert!(
+            verify_max_upsampling_ratio(&[], &Duration::from_nanos(1),).is_ok()
+        );
+    }
+
+    #[test]
+    fn test_mean_delta_does_not_modify_missing_values() {
+        let now = Utc::now();
+        let start_times =
+            &[now - Duration::from_secs(2), now - Duration::from_secs(1)];
+        let timestamps = &[now - Duration::from_secs(1), now];
+        let input_points = &[Some(1.0), None];
+        let window_start = now - Duration::from_secs(1);
+        let window_end = now;
+        let mean = mean_delta_value_in_window(
+            start_times,
+            timestamps,
+            input_points,
+            window_start,
+            window_end,
+        );
+        assert!(
+            mean.is_none(),
+            "This time window contains only a None value, which should not be \
+            included in the sum"
+        );
+    }
+
+    #[test]
+    fn test_mean_gauge_does_not_modify_missing_values() {
+        let now = Utc::now();
+        let timestamps = &[now - Duration::from_secs(1), now];
+        let input_points = &[Some(1.0), None];
+        let window_start = now - Duration::from_secs(1);
+        let window_end = now;
+        let mean = mean_gauge_value_in_window(
+            timestamps,
+            input_points,
+            window_start,
+            window_end,
+        );
+        assert!(
+            mean.is_none(),
+            "This time window contains only a None value, which should not be \
+            included in the sum"
+        );
+    }
+}
diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter.rs
new file mode 100644
index 0000000000..e97673c8f8
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/table_ops/filter.rs
@@ -0,0 +1,1283 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! An AST node describing filtering table operations.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::ast::cmp::Comparison;
+use crate::oxql::ast::ident::Ident;
+use crate::oxql::ast::literal::Literal;
+use crate::oxql::ast::logical_op::LogicalOp;
+use crate::oxql::point::DataType;
+use crate::oxql::point::MetricType;
+use crate::oxql::point::Points;
+use crate::oxql::point::ValueArray;
+use crate::oxql::query::special_idents;
+use crate::oxql::Error;
+use crate::oxql::Table;
+use crate::oxql::Timeseries;
+use anyhow::Context;
+use chrono::DateTime;
+use chrono::Utc;
+use oximeter::FieldType;
+use oximeter::FieldValue;
+use regex::Regex;
+use std::collections::BTreeSet;
+use std::fmt;
+
+/// An AST node for the `filter` table operation.
+///
+/// This can be a simple operation like `foo == "bar"` or a more complex
+/// expression, such as: `filter hostname == "foo" || (hostname == "bar"
+/// && id == "baz")`.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Filter {
+    /// True if the whole expression is negated.
+    pub negated: bool,
+    /// The contained filtering expression, which may contain many expressions
+    /// joined by logical operators.
+    pub expr: FilterExpr,
+}
+
+impl fmt::Display for Filter {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}({})", if self.negated { "!" } else { "" }, self.expr,)
+    }
+}
+
+impl core::str::FromStr for Filter {
+    type Err = Error;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        crate::oxql::ast::grammar::query_parser::filter_expr(s)
+            .map_err(|e| anyhow::anyhow!("invalid filter expression: {e}"))
+    }
+}
+
+// A crude limit on expression complexity, governing how many times we
+// iteratively apply a DNF simplification before bailing out.
+const EXPR_COMPLEXITY_ITERATIVE_LIMIT: usize = 32;
+
+// A crude limit on expression complexity, governing how many times we
+// recurisvely apply a DNF simplification before bailing out.
+const EXPR_COMPLEXITY_RECURSIVE_LIMIT: usize = 32;
+
+impl Filter {
+    /// Return the negation of this filter.
+    pub fn negate(&self) -> Filter {
+        Self { negated: !self.negated, ..self.clone() }
+    }
+
+    /// Split the filter at top-level disjunctions.
+    ///
+    /// This is likely only useful after simplifying to DNF with
+    /// `simplify_to_dnf()`.
+    pub fn flatten_disjunctions(&self) -> Vec<Self> {
+        let mut out = vec![];
+        self.flatten_disjunctions_inner(&mut out);
+        out
+    }
+
+    fn flatten_disjunctions_inner(&self, dis: &mut Vec<Self>) {
+        // Recursion is only needed if this is an OR expression. In that case,
+        // we split the left and push it, and then recurse on the right.
+        //
+        // Note that we don't need left-recursion because the parser is strictly
+        // non-left-recursive.
+        if let FilterExpr::Compound(CompoundFilter {
+            left,
+            op: LogicalOp::Or,
+            right,
+        }) = &self.expr
+        {
+            dis.push(*left.clone());
+            right.flatten_disjunctions_inner(dis);
+        } else {
+            // It's not an OR expression, or it is a simple filter expression.
+            // In either case, just push it directly, withouth recursing.
+            dis.push(self.clone());
+        }
+    }
+
+    /// Simplfy a filter expression to disjunctive normal form (DNF).
+    ///
+    /// Disjunctive normal form is one of a few canonical ways of writing a
+    /// boolean expression. It simplifies to a disjunction of conjunctions,
+    /// i.e., only has terms like `(a && b) || (c && d) || ...`.
+    ///
+    /// This method exists for the purposes of creating _independent_ pieces of
+    /// a filtering expression, each of which can be used to generate a new SQL
+    /// query run against ClickHouse. This is critical to support complicated
+    /// OxQL queries. Consider:
+    ///
+    /// ```ignore
+    /// get some_timeseries
+    ///     | filter (foo == "bar") || (timestamp > @now() - 1m && foo == "baz")
+    /// ```
+    ///
+    /// This requires fetching part of one timeseries, and all of another. One
+    /// cannot run this as a conjunction on the fields and then a query on the
+    /// measurements. It must be run in such a way to get the sets of keys
+    /// consistent with each term in the disjunction _independently_, so that
+    /// one can apply the timestamp filter to only the correct one.
+    ///
+    /// We use this method to generate the DNF, a form with only disjunctions of
+    /// conjunctions. That is, it's not possible to further distribute
+    /// conjunctions over disjunctions.
+    ///
+    /// Each disjunction is then a separate query against the fields table, where
+    /// we keep track of the keys in each. Each set of predicates and consistent
+    /// keys is then used later to fetch the measurements.
+    ///
+    /// # Notes
+    ///
+    /// There is a huge academic literature on this topic, part of the study of
+    /// formal languages and other areas theoretical computer science. These
+    /// references are mostly pretty dense and formal, though a few are really
+    /// useful. This [paper](https://www.researchgate.net/publication/220154187_A_Survey_of_Strategies_in_Program_Transformation_Systems)
+    /// is a good and accessible survey to the idea of translation systems --
+    /// it's mostly focused on programming languages and compilers, but Figures
+    /// 7-9 in particular are about DNF.
+    ///
+    /// As usual, the Wikipedia page is a reasonable overview as well,
+    /// [here](https://en.wikipedia.org/wiki/Disjunctive_normal_form). We're
+    /// using the "syntactic" DNF conversion algorithm, essentially. This
+    /// involves a recursive application of
+    /// [de Morgan's rules](https://en.wikipedia.org/wiki/De_Morgan%27s_laws),
+    /// [involution / double-negation](https://en.wikipedia.org/wiki/Involution_(mathematics)),
+    /// distributivity of [Boolean operators](https://en.wikipedia.org/wiki/Boolean_algebra#Monotone_laws),
+    /// etc.
+    pub fn simplify_to_dnf(&self) -> Result<Self, Error> {
+        self.simplify_to_dnf_impl(0)
+    }
+
+    fn simplify_to_dnf_impl(&self, level: usize) -> Result<Self, Error> {
+        anyhow::ensure!(
+            level < EXPR_COMPLEXITY_RECURSIVE_LIMIT,
+            "Maximum recursion level exceeded trying to simplify \
+            logical expression to disjunctive normal form"
+        );
+        let mut out = self.simplify_to_dnf_inner(level)?;
+        if &out == self {
+            return Ok(out);
+        }
+        // Continually apply simplifications as long as able.
+        //
+        // This makes me really nervous, so I'm adding an escape hatch that we
+        // only allow a few iterations. If we've not simplified within that,
+        // we'll just declare the expression too complicated to handle.
+        for _ in 0..EXPR_COMPLEXITY_ITERATIVE_LIMIT {
+            let out_ = out.simplify_to_dnf_inner(level)?;
+            if out_ == out {
+                return Ok(out_);
+            }
+            out = out_;
+        }
+        anyhow::bail!("Logical expression is too complicated to simplify")
+    }
+
+    fn simplify_to_dnf_inner(&self, level: usize) -> Result<Self, Error> {
+        let new = self.expr.simplify_to_dnf(level)?;
+
+        // This matches the rule:
+        //
+        // !!x -> x
+        if self.negated && new.negated && new.is_simple() {
+            return Ok(new.negate());
+        }
+
+        // These two blocks match de Morgan's rules, which distribute a negation
+        // down and swap the logical operator.
+        if self.negated {
+            // This matches one of de Morgan's rules:
+            //
+            // !(x && y) -> !x || !y
+            if let FilterExpr::Compound(CompoundFilter {
+                left: x,
+                op: LogicalOp::And,
+                right: y,
+            }) = &new.expr
+            {
+                let expr = FilterExpr::Compound(CompoundFilter {
+                    left: Box::new(x.negate()),
+                    op: LogicalOp::Or,
+                    right: Box::new(y.negate()),
+                });
+                return Ok(Filter { negated: false, expr });
+            }
+
+            // This matches the other of de Morgan's rules:
+            //
+            // !(x || y) -> !x && !y
+            if let FilterExpr::Compound(CompoundFilter {
+                left: x,
+                op: LogicalOp::And,
+                right: y,
+            }) = &new.expr
+            {
+                let expr = FilterExpr::Compound(CompoundFilter {
+                    left: Box::new(x.negate()),
+                    op: LogicalOp::Or,
+                    right: Box::new(y.negate()),
+                });
+                return Ok(Filter { negated: false, expr });
+            }
+        }
+
+        // Nothing else to do, just return ourself, though we do need to make
+        // sure we copy the negation from self as well.
+        Ok(Self { negated: self.negated, ..new })
+    }
+
+    // Merge this filter with another one, using the provided operator.
+    pub(crate) fn merge(&self, other: &Filter, op: LogicalOp) -> Self {
+        Self {
+            negated: false,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(self.clone()),
+                op,
+                right: Box::new(other.clone()),
+            }),
+        }
+    }
+
+    // Apply the filter to the provided field.
+    //
+    // This returns `Ok(None)` if the filter doesn't apply. It returns `Ok(x)`
+    // if the filter does apply, where `x` is the logical application of the
+    // filter to the field. `true` means "keep this field", which is analogous
+    // to the `Iterator::filter()` method's signature.
+    //
+    // If the filter does apply, but is incompatible or incomparable, return an
+    // error.
+    fn filter_field(
+        &self,
+        name: &str,
+        value: &FieldValue,
+    ) -> Result<Option<bool>, Error> {
+        let result = match &self.expr {
+            FilterExpr::Simple(inner) => inner.filter_field(name, value),
+            FilterExpr::Compound(inner) => inner.filter_field(name, value),
+        };
+        result.map(|maybe_keep| maybe_keep.map(|keep| self.negated ^ keep))
+    }
+
+    // Apply the filter to the provided points.
+    fn filter_points(&self, points: &Points) -> Result<Points, Error> {
+        let to_keep = self.filter_points_inner(points)?;
+        points.filter(to_keep)
+    }
+
+    // Inner implementation of filtering points.
+    //
+    // Returns an array of bools, where true indicates the point should be kept.
+    fn filter_points_inner(&self, points: &Points) -> Result<Vec<bool>, Error> {
+        match &self.expr {
+            FilterExpr::Simple(inner) => {
+                inner.filter_points(self.negated, points)
+            }
+            FilterExpr::Compound(inner) => {
+                inner.filter_points(self.negated, points)
+            }
+        }
+    }
+
+    // Apply the filtering table operation.
+    pub(crate) fn apply(&self, tables: &[Table]) -> Result<Vec<Table>, Error> {
+        anyhow::ensure!(
+            tables.len() >= 1,
+            "Filtering operations require at least one table",
+        );
+        let mut output_tables = Vec::with_capacity(tables.len());
+        // Ensure that all the identifiers in this filter apply to the
+        // input timeseries. We can do this once at the beginning, because all
+        // the timeseries in a table have the same set of fields.
+        let first_timeseries = tables[0]
+            .iter()
+            .next()
+            .context("Table contains no timeseries to filter")?;
+        let ident_names = self.ident_names();
+
+        // There are extra, implied names that depend on the data type of the
+        // timeseries itself, check those as well.
+        let extras = implicit_field_names(first_timeseries);
+        let not_valid = ident_names
+            .iter()
+            .filter(|&&name| {
+                !(first_timeseries.fields.contains_key(name)
+                    || extras.contains(name))
+            })
+            .collect::<Vec<_>>();
+        anyhow::ensure!(
+            not_valid.is_empty(),
+            "The filter expression contains identifiers that are not \
+            valid for its input timeseries. Invalid identifiers: {:?}, \
+            timeseries fields: {:?}",
+            not_valid,
+            ident_names.union(&extras),
+        );
+
+        // Filter each input table in succession.
+        for table in tables.iter() {
+            let mut timeseries = Vec::with_capacity(table.len());
+            'timeseries: for input in table.iter() {
+                // If the filter restricts any of the fields, remove this
+                // timeseries altogether.
+                for (name, value) in input.fields.iter() {
+                    if let Some(false) = self.filter_field(name, value)? {
+                        continue 'timeseries;
+                    }
+                }
+
+                // Apply the filter to the data points as well.
+                let points = self.filter_points(&input.points)?;
+
+                // Similar to above, if the filter removes all data points in
+                // the timeseries, let's remove the timeseries altogether.
+                if points.is_empty() {
+                    continue;
+                }
+                timeseries.push(Timeseries {
+                    fields: input.fields.clone(),
+                    points,
+                    alignment: input.alignment,
+                })
+            }
+            output_tables.push(Table::from_timeseries(
+                table.name(),
+                timeseries.into_iter(),
+            )?);
+        }
+        Ok(output_tables)
+    }
+
+    // Return the last referenced timestamp by this filter, if any.
+    //
+    // This is the maximum timestamp, before which any filtered point must lie.
+    // This is used to determine the query end time.
+    pub(crate) fn last_timestamp(&self) -> Option<DateTime<Utc>> {
+        match &self.expr {
+            FilterExpr::Simple(inner) => inner.last_timestamp(),
+            FilterExpr::Compound(inner) => inner.last_timestamp(),
+        }
+    }
+
+    // Return the name of all identifiers listed in this filter.
+    fn ident_names(&self) -> BTreeSet<&str> {
+        match &self.expr {
+            FilterExpr::Simple(inner) => {
+                let mut out = BTreeSet::new();
+                out.insert(inner.ident.as_str());
+                out
+            }
+            FilterExpr::Compound(inner) => {
+                let mut all = inner.left.ident_names();
+                all.extend(inner.right.ident_names());
+                all
+            }
+        }
+    }
+
+    fn is_xor(&self) -> bool {
+        self.is_op(LogicalOp::Xor)
+    }
+
+    fn is_op(&self, expected_op: LogicalOp) -> bool {
+        let FilterExpr::Compound(CompoundFilter { op, .. }) = &self.expr else {
+            return false;
+        };
+        op == &expected_op
+    }
+
+    // If this is an XOR, rewrite it to a disjunction of conjunctions.
+    //
+    // If it is not, return a clone of self.
+    fn rewrite_xor_to_disjunction(&self) -> Self {
+        let self_ = self.clone();
+        if !self.is_xor() {
+            return self_;
+        }
+        let Filter {
+            negated,
+            expr: FilterExpr::Compound(CompoundFilter { left, right, .. }),
+        } = self_
+        else {
+            unreachable!();
+        };
+        let left_ = CompoundFilter {
+            left: left.clone(),
+            op: LogicalOp::And,
+            right: Box::new(right.negate()),
+        };
+        let right_ = CompoundFilter {
+            left: Box::new(left.negate()),
+            op: LogicalOp::And,
+            right,
+        };
+        let expr = CompoundFilter {
+            left: Box::new(left_.to_filter()),
+            op: LogicalOp::Or,
+            right: Box::new(right_.to_filter()),
+        };
+        Filter { negated, expr: FilterExpr::Compound(expr) }
+    }
+
+    fn is_simple(&self) -> bool {
+        matches!(self.expr, FilterExpr::Simple(_))
+    }
+}
+
+/// Return the names of the implicit fields / columns that a filter can apply
+/// to, based on the metric types of the contained data points.
+fn implicit_field_names(
+    first_timeseries: &Timeseries,
+) -> BTreeSet<&'static str> {
+    let mut out = BTreeSet::new();
+
+    // Everything has a timestamp!
+    out.insert(special_idents::TIMESTAMP);
+    let type_info = first_timeseries
+        .points
+        .metric_types()
+        .zip(first_timeseries.points.data_types());
+    for (metric_type, data_type) in type_info {
+        match (metric_type, data_type) {
+            // Scalar gauges.
+            (
+                MetricType::Gauge,
+                DataType::Integer
+                | DataType::Boolean
+                | DataType::Double
+                | DataType::String,
+            ) => {
+                out.insert(special_idents::DATUM);
+            }
+            // Histogram gauges.
+            (
+                MetricType::Gauge,
+                DataType::IntegerDistribution | DataType::DoubleDistribution,
+            ) => {
+                out.insert(special_idents::BINS);
+                out.insert(special_idents::COUNTS);
+            }
+            // Scalars, either delta or cumulatives.
+            (
+                MetricType::Delta | MetricType::Cumulative,
+                DataType::Integer | DataType::Double,
+            ) => {
+                out.insert(special_idents::DATUM);
+                out.insert(special_idents::START_TIME);
+            }
+            // Histograms, either delta or cumulative.
+            (
+                MetricType::Delta | MetricType::Cumulative,
+                DataType::IntegerDistribution | DataType::DoubleDistribution,
+            ) => {
+                out.insert(special_idents::BINS);
+                out.insert(special_idents::COUNTS);
+                out.insert(special_idents::START_TIME);
+            }
+            // Impossible combinations
+            (
+                MetricType::Delta | MetricType::Cumulative,
+                DataType::Boolean | DataType::String,
+            ) => unreachable!(),
+        }
+    }
+    out
+}
+
+/// A filtering expression, used in the `filter` table operation.
+#[derive(Clone, Debug, PartialEq)]
+pub enum FilterExpr {
+    /// A single logical expression, e.g., `foo == "bar"`.
+    Simple(SimpleFilter),
+    /// Two logical expressions, e.g., `foo == "bar" || yes == false`
+    Compound(CompoundFilter),
+}
+
+impl FilterExpr {
+    fn to_filter(&self) -> Filter {
+        Filter { negated: false, expr: self.clone() }
+    }
+
+    fn simplify_to_dnf(&self, level: usize) -> Result<Filter, Error> {
+        match self {
+            FilterExpr::Simple(_) => Ok(self.to_filter()),
+            FilterExpr::Compound(CompoundFilter { left, op, right }) => {
+                // Apply recursively first.
+                let left = left.simplify_to_dnf_impl(level + 1)?;
+                let right = right.simplify_to_dnf_impl(level + 1)?;
+
+                // This matches the rule:
+                //
+                // (x || y) && z -> (x && z) || (y && z)
+                if let (
+                    FilterExpr::Compound(CompoundFilter {
+                        left: x,
+                        op: LogicalOp::Or,
+                        right: y,
+                    }),
+                    LogicalOp::And,
+                    FilterExpr::Simple(z),
+                ) = (&left.expr, op, &right.expr)
+                {
+                    let left_ = Filter {
+                        negated: false,
+                        expr: FilterExpr::Compound(CompoundFilter {
+                            left: x.clone(),
+                            op: LogicalOp::And,
+                            right: Box::new(z.to_filter()),
+                        }),
+                    };
+                    let right_ = Filter {
+                        negated: false,
+                        expr: FilterExpr::Compound(CompoundFilter {
+                            left: y.clone(),
+                            op: LogicalOp::And,
+                            right: Box::new(z.to_filter()),
+                        }),
+                    };
+                    return Ok(Filter {
+                        negated: false,
+                        expr: FilterExpr::Compound(CompoundFilter {
+                            left: Box::new(left_),
+                            op: LogicalOp::Or,
+                            right: Box::new(right_),
+                        }),
+                    });
+                }
+
+                // This matches the rule:
+                //
+                // z && (x || y) -> (z && x) || (z && y)
+                if let (
+                    FilterExpr::Simple(z),
+                    LogicalOp::And,
+                    FilterExpr::Compound(CompoundFilter {
+                        left: x,
+                        op: LogicalOp::Or,
+                        right: y,
+                    }),
+                ) = (&left.expr, op, &right.expr)
+                {
+                    let left_ = Filter {
+                        negated: false,
+                        expr: FilterExpr::Compound(CompoundFilter {
+                            left: Box::new(z.to_filter()),
+                            op: LogicalOp::And,
+                            right: x.clone(),
+                        }),
+                    };
+                    let right_ = Filter {
+                        negated: false,
+                        expr: FilterExpr::Compound(CompoundFilter {
+                            left: Box::new(z.to_filter()),
+                            op: LogicalOp::And,
+                            right: y.clone(),
+                        }),
+                    };
+                    return Ok(Filter {
+                        negated: false,
+                        expr: FilterExpr::Compound(CompoundFilter {
+                            left: Box::new(left_),
+                            op: LogicalOp::Or,
+                            right: Box::new(right_),
+                        }),
+                    });
+                }
+
+                // Lastly, simplify an XOR to its logical equivalent, which is
+                // in DNF.
+                let out = Filter {
+                    negated: false,
+                    expr: FilterExpr::Compound(CompoundFilter {
+                        left: Box::new(left),
+                        op: *op,
+                        right: Box::new(right),
+                    }),
+                };
+                Ok(out.rewrite_xor_to_disjunction())
+            }
+        }
+    }
+}
+
+impl fmt::Display for FilterExpr {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            FilterExpr::Simple(inner) => write!(f, "{inner}"),
+            FilterExpr::Compound(inner) => write!(f, "{inner}"),
+        }
+    }
+}
+
+/// Two filter expressions joined by a logical operator.
+#[derive(Clone, Debug, PartialEq)]
+pub struct CompoundFilter {
+    /// The left subexpression.
+    pub left: Box<Filter>,
+    /// The logical operator joining the two expressions.
+    pub op: LogicalOp,
+    /// The right subexpression.
+    pub right: Box<Filter>,
+}
+
+impl fmt::Display for CompoundFilter {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{} {} {}", self.left, self.op, self.right,)
+    }
+}
+
+impl CompoundFilter {
+    fn to_filter(&self) -> Filter {
+        Filter { negated: false, expr: FilterExpr::Compound(self.clone()) }
+    }
+
+    // Apply the filter to the provided field.
+    fn filter_field(
+        &self,
+        name: &str,
+        value: &FieldValue,
+    ) -> Result<Option<bool>, Error> {
+        let left = self.left.filter_field(name, value)?;
+        let right = self.right.filter_field(name, value)?;
+        match (left, right) {
+            (None, None) => Ok(None),
+            (Some(x), None) | (None, Some(x)) => Ok(Some(x)),
+            (Some(left), Some(right)) => match self.op {
+                LogicalOp::And => Ok(Some(left && right)),
+                LogicalOp::Or => Ok(Some(left || right)),
+                LogicalOp::Xor => Ok(Some(left ^ right)),
+            },
+        }
+    }
+
+    // Apply the filter to the provided points.
+    fn filter_points(
+        &self,
+        negated: bool,
+        points: &Points,
+    ) -> Result<Vec<bool>, Error> {
+        let mut left = self.left.filter_points_inner(points)?;
+        let right = self.right.filter_points_inner(points)?;
+        match self.op {
+            LogicalOp::And => {
+                for i in 0..left.len() {
+                    left[i] = negated ^ (left[i] & right[i]);
+                }
+            }
+            LogicalOp::Or => {
+                for i in 0..left.len() {
+                    left[i] = negated ^ (left[i] | right[i]);
+                }
+            }
+            LogicalOp::Xor => {
+                for i in 0..left.len() {
+                    left[i] = negated ^ (left[i] ^ right[i]);
+                }
+            }
+        }
+        Ok(left)
+    }
+
+    fn last_timestamp(&self) -> Option<DateTime<Utc>> {
+        let left = self.left.last_timestamp();
+        let right = self.right.last_timestamp();
+        match (left, right) {
+            (None, None) => None,
+            (Some(single), None) | (None, Some(single)) => Some(single),
+            (Some(left), Some(right)) => Some(left.max(right)),
+        }
+    }
+}
+
+/// A simple filter expression, comparing an identifier to a value.
+#[derive(Clone, Debug, PartialEq)]
+pub struct SimpleFilter {
+    /// The identifier being compared.
+    pub ident: Ident,
+    /// The comparison operator.
+    pub cmp: Comparison,
+    /// The value to compare the identifier against.
+    pub value: Literal,
+}
+
+impl fmt::Display for SimpleFilter {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{} {} {}", self.ident, self.cmp, self.value,)
+    }
+}
+
+impl SimpleFilter {
+    fn to_filter(&self) -> Filter {
+        Filter { negated: false, expr: FilterExpr::Simple(self.clone()) }
+    }
+
+    // Apply this filter to the provided field.
+    //
+    // If the field name does not match the identifier in `self`, return
+    // `Ok(None)`, since this filter does not apply to the provided field.
+    //
+    // If the name matches and the type of `self` is compatible, return `Ok(x)`
+    // where `x` is the logical application of the filter to the field.
+    //
+    // If the field matches the name, but the type is not compatible, return an
+    // error.
+    fn filter_field(
+        &self,
+        name: &str,
+        value: &FieldValue,
+    ) -> Result<Option<bool>, Error> {
+        // If the name does not match, this filter does not apply, and so we do not
+        // filter the field.
+        if self.ident.as_str() != name {
+            return Ok(None);
+        }
+        self.value.compare_field(value, self.cmp)
+    }
+
+    pub(crate) fn value_type_is_compatible_with_field(
+        &self,
+        field_type: FieldType,
+    ) -> bool {
+        self.value.is_compatible_with_field(field_type)
+    }
+
+    /// Return the expression as a string that can be applied safely in the
+    /// database.
+    pub(crate) fn as_db_safe_string(&self) -> String {
+        let expr = self.value.as_db_safe_string();
+        let fn_name = self.cmp.as_db_function_name();
+        format!("{}({}, {})", fn_name, self.ident, expr)
+    }
+
+    // Returns an array of bools, where true indicates the point should be kept.
+    fn filter_points(
+        &self,
+        negated: bool,
+        points: &Points,
+    ) -> Result<Vec<bool>, Error> {
+        let ident = self.ident.as_str();
+        if ident == "timestamp" {
+            self.filter_points_by_timestamp(negated, &points.timestamps)
+        } else if ident == "datum" {
+            anyhow::ensure!(
+                points.dimensionality() == 1,
+                "Filtering multidimensional values by datum is not yet supported"
+            );
+            self.filter_points_by_datum(negated, points.values(0).unwrap())
+        } else {
+            Ok(vec![!negated; points.len()])
+        }
+    }
+
+    fn filter_points_by_timestamp(
+        &self,
+        negated: bool,
+        timestamps: &[DateTime<Utc>],
+    ) -> Result<Vec<bool>, Error> {
+        let Literal::Timestamp(timestamp) = &self.value else {
+            anyhow::bail!(
+                "Cannot compare non-timestamp filter against a timestamp"
+            );
+        };
+        match self.cmp {
+            Comparison::Eq => Ok(timestamps
+                .iter()
+                .map(|t| negated ^ (t == timestamp))
+                .collect()),
+            Comparison::Ne => Ok(timestamps
+                .iter()
+                .map(|t| negated ^ (t != timestamp))
+                .collect()),
+            Comparison::Gt => Ok(timestamps
+                .iter()
+                .map(|t| negated ^ (t > timestamp))
+                .collect()),
+            Comparison::Ge => Ok(timestamps
+                .iter()
+                .map(|t| negated ^ (t >= timestamp))
+                .collect()),
+            Comparison::Lt => Ok(timestamps
+                .iter()
+                .map(|t| negated ^ (t < timestamp))
+                .collect()),
+            Comparison::Le => Ok(timestamps
+                .iter()
+                .map(|t| negated ^ (t <= timestamp))
+                .collect()),
+            Comparison::Like => unreachable!(),
+        }
+    }
+
+    fn filter_points_by_datum(
+        &self,
+        negated: bool,
+        values: &ValueArray,
+    ) -> Result<Vec<bool>, Error> {
+        match (&self.value, values) {
+            (Literal::Integer(int), ValueArray::Integer(ints)) => {
+                match self.cmp {
+                    Comparison::Eq => Ok(ints
+                        .iter()
+                        .map(|maybe_int| {
+                            maybe_int
+                                .map(|i| negated ^ (i128::from(i) == *int))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Ne => Ok(ints
+                        .iter()
+                        .map(|maybe_int| {
+                            maybe_int
+                                .map(|i| negated ^ (i128::from(i) != *int))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Gt => Ok(ints
+                        .iter()
+                        .map(|maybe_int| {
+                            maybe_int
+                                .map(|i| negated ^ (i128::from(i) > *int))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Ge => Ok(ints
+                        .iter()
+                        .map(|maybe_int| {
+                            maybe_int
+                                .map(|i| negated ^ (i128::from(i) >= *int))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Lt => Ok(ints
+                        .iter()
+                        .map(|maybe_int| {
+                            maybe_int
+                                .map(|i| negated ^ (i128::from(i) < *int))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Le => Ok(ints
+                        .iter()
+                        .map(|maybe_int| {
+                            maybe_int
+                                .map(|i| negated ^ (i128::from(i) <= *int))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Like => unreachable!(),
+                }
+            }
+            (Literal::Double(double), ValueArray::Double(doubles)) => {
+                match self.cmp {
+                    Comparison::Eq => Ok(doubles
+                        .iter()
+                        .map(|maybe_double| {
+                            maybe_double
+                                .map(|d| negated ^ (d == *double))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Ne => Ok(doubles
+                        .iter()
+                        .map(|maybe_double| {
+                            maybe_double
+                                .map(|d| negated ^ (d != *double))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Gt => Ok(doubles
+                        .iter()
+                        .map(|maybe_double| {
+                            maybe_double
+                                .map(|d| negated ^ (d > *double))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Ge => Ok(doubles
+                        .iter()
+                        .map(|maybe_double| {
+                            maybe_double
+                                .map(|d| negated ^ (d >= *double))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Lt => Ok(doubles
+                        .iter()
+                        .map(|maybe_double| {
+                            maybe_double
+                                .map(|d| negated ^ (d < *double))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Le => Ok(doubles
+                        .iter()
+                        .map(|maybe_double| {
+                            maybe_double
+                                .map(|d| negated ^ (d <= *double))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Like => unreachable!(),
+                }
+            }
+            (Literal::String(string), ValueArray::String(strings)) => {
+                let string = string.as_str();
+                match self.cmp {
+                    Comparison::Eq => Ok(strings
+                        .iter()
+                        .map(|maybe_string| {
+                            maybe_string
+                                .as_deref()
+                                .map(|s| negated ^ (s == string))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Ne => Ok(strings
+                        .iter()
+                        .map(|maybe_string| {
+                            maybe_string
+                                .as_deref()
+                                .map(|s| negated ^ (s != string))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Gt => Ok(strings
+                        .iter()
+                        .map(|maybe_string| {
+                            maybe_string
+                                .as_deref()
+                                .map(|s| negated ^ (s > string))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Ge => Ok(strings
+                        .iter()
+                        .map(|maybe_string| {
+                            maybe_string
+                                .as_deref()
+                                .map(|s| negated ^ (s >= string))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Lt => Ok(strings
+                        .iter()
+                        .map(|maybe_string| {
+                            maybe_string
+                                .as_deref()
+                                .map(|s| negated ^ (s < string))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Le => Ok(strings
+                        .iter()
+                        .map(|maybe_string| {
+                            maybe_string
+                                .as_deref()
+                                .map(|s| negated ^ (s <= string))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Like => {
+                        let re = Regex::new(string)?;
+                        Ok(strings
+                            .iter()
+                            .map(|maybe_string| {
+                                maybe_string
+                                    .as_deref()
+                                    .map(|s| negated ^ re.is_match(s))
+                                    .unwrap_or(false)
+                            })
+                            .collect())
+                    }
+                }
+            }
+            (Literal::Boolean(boolean), ValueArray::Boolean(booleans)) => {
+                match self.cmp {
+                    Comparison::Eq => Ok(booleans
+                        .iter()
+                        .map(|maybe_boolean| {
+                            maybe_boolean
+                                .map(|b| negated ^ (b == *boolean))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Ne => Ok(booleans
+                        .iter()
+                        .map(|maybe_boolean| {
+                            maybe_boolean
+                                .map(|b| negated ^ (b != *boolean))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Gt => Ok(booleans
+                        .iter()
+                        .map(|maybe_boolean| {
+                            maybe_boolean
+                                .map(|b| negated ^ (b & !(*boolean)))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Ge => Ok(booleans
+                        .iter()
+                        .map(|maybe_boolean| {
+                            maybe_boolean
+                                .map(|b| negated ^ (b >= *boolean))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Lt => Ok(booleans
+                        .iter()
+                        .map(|maybe_boolean| {
+                            maybe_boolean
+                                .map(|b| negated ^ (!b & *boolean))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Le => Ok(booleans
+                        .iter()
+                        .map(|maybe_boolean| {
+                            maybe_boolean
+                                .map(|b| negated ^ (b <= *boolean))
+                                .unwrap_or(false)
+                        })
+                        .collect()),
+                    Comparison::Like => unreachable!(),
+                }
+            }
+            (_, _) => {
+                let lit_type = match &self.value {
+                    Literal::Uuid(_) => "UUID",
+                    Literal::Duration(_) => "duration",
+                    Literal::Timestamp(_) => "timestamp",
+                    Literal::IpAddr(_) => "IP address",
+                    Literal::Integer(_) => "integer",
+                    Literal::Double(_) => "double",
+                    Literal::String(_) => "string",
+                    Literal::Boolean(_) => "boolean",
+                };
+                anyhow::bail!(
+                    "Cannot compare {} literal against values of type {}",
+                    lit_type,
+                    values.data_type(),
+                )
+            }
+        }
+    }
+
+    fn last_timestamp(&self) -> Option<DateTime<Utc>> {
+        if self.ident.as_str() == "timestamp"
+            && matches!(
+                self.cmp,
+                Comparison::Lt | Comparison::Le | Comparison::Eq
+            )
+        {
+            let Literal::Timestamp(t) = self.value else {
+                return None;
+            };
+            Some(t)
+        } else {
+            None
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::oxql::ast::grammar::query_parser;
+    use crate::oxql::ast::logical_op::LogicalOp;
+    use crate::oxql::point::MetricType;
+    use crate::oxql::point::Points;
+    use crate::oxql::point::ValueArray;
+    use crate::oxql::point::Values;
+    use chrono::Utc;
+    use oximeter::FieldValue;
+    use std::time::Duration;
+    use uuid::Uuid;
+
+    #[test]
+    fn test_atom_filter_double_points() {
+        let start_times = None;
+        let timestamps =
+            vec![Utc::now(), Utc::now() + Duration::from_secs(1000)];
+        let values = vec![Values {
+            values: ValueArray::Double(vec![Some(0.0), Some(2.0)]),
+            metric_type: MetricType::Gauge,
+        }];
+        let points = Points { start_times, timestamps, values };
+
+        // This filter should remove the first point based on its timestamp.
+        let t = Utc::now() + Duration::from_secs(10);
+        let q =
+            format!("filter timestamp > @{}", t.format("%Y-%m-%dT%H:%M:%S"));
+        let filter = query_parser::filter(q.as_str()).unwrap();
+        let out = filter.filter_points(&points).unwrap();
+        assert!(out.len() == 1);
+        assert_eq!(
+            out.values(0).unwrap().as_double().unwrap()[0],
+            points.values(0).unwrap().as_double().unwrap()[1],
+        );
+
+        // And this one the second point based on the datum
+        let filter = query_parser::filter("filter datum < 1.0").unwrap();
+        let out = filter.filter_points(&points).unwrap();
+        assert!(out.len() == 1);
+        assert_eq!(
+            out.values(0).unwrap().as_double().unwrap()[0],
+            points.values(0).unwrap().as_double().unwrap()[0],
+        );
+    }
+
+    #[test]
+    fn test_atom_filter_points_wrong_type() {
+        let start_times = None;
+        let timestamps =
+            vec![Utc::now(), Utc::now() + Duration::from_secs(1000)];
+        let values = vec![Values {
+            values: ValueArray::Double(vec![Some(0.0), Some(2.0)]),
+            metric_type: MetricType::Gauge,
+        }];
+        let points = Points { start_times, timestamps, values };
+
+        let filter =
+            query_parser::filter("filter datum < \"something\"").unwrap();
+        assert!(filter.filter_points(&points).is_err());
+    }
+
+    #[test]
+    fn test_all_ident_names() {
+        let f = query_parser::filter("filter timestamp > @now() && datum < 1")
+            .unwrap();
+        assert_eq!(
+            f.ident_names(),
+            ["datum", "timestamp"].into_iter().collect()
+        );
+
+        let f = query_parser::filter(
+            "filter timestamp > @now() - 1m &&  timestamp < @now()",
+        )
+        .unwrap();
+        let idents = f.ident_names();
+        assert_eq!(idents.len(), 1);
+        assert_eq!(idents.iter().next().unwrap(), &"timestamp");
+    }
+
+    #[test]
+    #[allow(clippy::impossible_comparisons)]
+    fn test_filter_field_logic() {
+        for op in [LogicalOp::And, LogicalOp::Or, LogicalOp::Xor] {
+            let s = format!("filter (x > 10) {op} (x < 0)");
+            let filter = query_parser::filter(&s).unwrap();
+            let cases = &[11, 10, 5, 0, -1];
+            for &val in cases.iter() {
+                let pass = match op {
+                    LogicalOp::And => (val > 10) && (val < 0),
+                    LogicalOp::Or => (val > 10) || (val < 0),
+                    LogicalOp::Xor => (val > 10) ^ (val < 0),
+                };
+                let result = filter
+                    .filter_field("x", &FieldValue::I32(val))
+                    .expect("Filter should be considered comparable")
+                    .expect("Filter should apply to field of the same name");
+                assert_eq!(
+                    result,
+                    pass,
+                    "Filter '{}' should {} the value {}",
+                    filter,
+                    if pass { "pass" } else { "not pass" },
+                    val,
+                );
+            }
+
+            // This names a different field, so should not apply.
+            assert_eq!(
+                filter
+                    .filter_field("y", &FieldValue::I32(11))
+                    .expect("Filter should be considered comparable"),
+                None,
+                "Filter should not apply, since it names a different field"
+            );
+
+            // These values should not be comparable at all, so we'll return an
+            // error.
+            let incomparable = &[
+                FieldValue::String("foo".into()),
+                FieldValue::Uuid(Uuid::new_v4()),
+                FieldValue::IpAddr("127.0.0.1".parse().unwrap()),
+                FieldValue::Bool(false),
+            ];
+            for na in incomparable.iter() {
+                filter
+                    .filter_field("x", na)
+                    .expect_err("These should not be comparable at all");
+            }
+        }
+    }
+
+    #[test]
+    fn test_simplify_to_dnf() {
+        let cases = &[
+            // Simple cases that should not be changed
+            ("a == 0", "a == 0"),
+            ("!(a == 0)", "!(a == 0)"),
+            ("a == 0 || b == 1", "a == 0 || b == 1"),
+            ("a == 0 && b == 1", "a == 0 && b == 1"),
+
+            // Rewrite of XOR
+            ("a == 0 ^ b == 1", "(a == 0 && !(b == 1)) || (!(a == 0) && (b == 1))"),
+
+            // Simple applications of distribution rules.
+            //
+            // Distribute conjunction over disjunction.
+            ("a == 0 && (b == 1 || c == 2)", "(a == 0 && b == 1) || (a == 0 && c == 2)"),
+            ("a == 0 && (b == 1 || c == 2 || d == 3)", "(a == 0 && b == 1) || (a == 0 && c == 2) || (a == 0 && d == 3)"),
+            ("a == 0 && (b == 1 || c == 2 || d == 3 || e == 4)", "(a == 0 && b == 1) || (a == 0 && c == 2) || (a == 0 && d == 3) || (a == 0 && e == 4)"),
+        ];
+        for (input, expected) in cases.iter() {
+            let parsed_input = query_parser::filter_expr(input).unwrap();
+            let simplified = parsed_input.simplify_to_dnf().unwrap();
+            let parsed_expected = query_parser::filter_expr(expected).unwrap();
+            assert_eq!(
+                simplified,
+                parsed_expected,
+                "\ninput expression: {}\nparsed to: {}\nsimplifed to: {}\nexpected: {}\n",
+                input,
+                parsed_input,
+                simplified,
+                expected,
+            );
+        }
+    }
+
+    #[test]
+    fn test_dnf_conversion_fails_on_extremely_long_expressions() {
+        let atom = "a == 0";
+        let or_chain = std::iter::repeat(atom)
+            .take(super::EXPR_COMPLEXITY_ITERATIVE_LIMIT + 1)
+            .collect::<Vec<_>>()
+            .join(" || ");
+        let expr = format!("{atom} && ({or_chain})");
+        let parsed = query_parser::filter_expr(&expr).unwrap();
+        assert!(
+            parsed.simplify_to_dnf().is_err(),
+            "Should fail for extremely long logical expressions"
+        );
+    }
+
+    #[test]
+    fn test_dnf_conversion_fails_on_extremely_deep_expressions() {
+        let atom = "a == 0";
+        let mut expr = atom.to_string();
+        for _ in 0..super::EXPR_COMPLEXITY_RECURSIVE_LIMIT + 1 {
+            expr = format!("{atom} && ({expr})");
+        }
+        let parsed = query_parser::filter_expr(&expr).unwrap();
+        assert!(
+            parsed.simplify_to_dnf().is_err(),
+            "Should fail for extremely deep logical expressions"
+        );
+    }
+}
diff --git a/oximeter/db/src/oxql/ast/table_ops/get.rs b/oximeter/db/src/oxql/ast/table_ops/get.rs
new file mode 100644
index 0000000000..f0ef22c2f6
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/table_ops/get.rs
@@ -0,0 +1,15 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! AST node for the `get` table operation.
+
+// Copyright 2024 Oxide Computer Company
+
+use oximeter::TimeseriesName;
+
+/// An AST node like: `get foo:bar`
+#[derive(Clone, Debug, PartialEq)]
+pub struct Get {
+    pub timeseries_name: TimeseriesName,
+}
diff --git a/oximeter/db/src/oxql/ast/table_ops/group_by.rs b/oximeter/db/src/oxql/ast/table_ops/group_by.rs
new file mode 100644
index 0000000000..da2b1413db
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/table_ops/group_by.rs
@@ -0,0 +1,746 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! AST node for the `group_by` operation.
+
+// Copyright 2024 Oxide Computer Company
+
+use chrono::DateTime;
+use chrono::Utc;
+
+use crate::oxql::ast::ident::Ident;
+use crate::oxql::point::DataType;
+use crate::oxql::point::MetricType;
+use crate::oxql::point::ValueArray;
+use crate::oxql::Error;
+use crate::oxql::Table;
+use crate::oxql::Timeseries;
+use crate::TimeseriesKey;
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap;
+
+/// A table operation for grouping data by fields, apply a reducer to the
+/// remaining.
+#[derive(Clone, Debug, PartialEq)]
+pub struct GroupBy {
+    pub identifiers: Vec<Ident>,
+    pub reducer: Reducer,
+}
+
+impl GroupBy {
+    // Apply the group_by table operation.
+    pub(crate) fn apply(&self, tables: &[Table]) -> Result<Vec<Table>, Error> {
+        anyhow::ensure!(
+            tables.len() == 1,
+            "Group by operations require exactly one table",
+        );
+        let table = &tables[0];
+        anyhow::ensure!(
+            table.is_aligned(),
+            "Input tables to a `group_by` must be aligned"
+        );
+
+        match self.reducer {
+            Reducer::Mean => self.reduce_mean(table),
+            Reducer::Sum => self.reduce_sum(table),
+        }
+    }
+
+    fn check_input_timeseries(input: &Timeseries) -> Result<(), Error> {
+        anyhow::ensure!(input.points.len() > 0, "Timeseries cannot be empty");
+
+        // For now, we can only apply this to 1-D timeseries.
+        anyhow::ensure!(
+            input.points.dimensionality() == 1,
+            "Group-by with multi-dimensional timeseries is not yet supported"
+        );
+        let data_type = input.points.data_types().next().unwrap();
+        anyhow::ensure!(
+            data_type.is_numeric(),
+            "Only numeric data types can be grouped, not {}",
+            data_type,
+        );
+        let metric_type = input.points.metric_types().next().unwrap();
+        anyhow::ensure!(
+            !matches!(metric_type, MetricType::Cumulative),
+            "Cumulative metric types cannot be grouped",
+        );
+        Ok(())
+    }
+
+    // Reduce points in each group by summing.
+    fn reduce_sum(&self, table: &Table) -> Result<Vec<Table>, Error> {
+        assert_eq!(self.reducer, Reducer::Sum);
+        let mut output_table = Table::new(table.name());
+        let kept_fields: Vec<_> =
+            self.identifiers.iter().map(Ident::as_str).collect();
+
+        for input in table.iter() {
+            Self::check_input_timeseries(input)?;
+
+            // Throw away the fields in this timeseries that are not in the
+            // group_by list.
+            let dropped = input.copy_with_fields(&kept_fields)?;
+            let key = dropped.key();
+
+            // Fetch the existing timeseries, if one exists. If one does _not_ exist,
+            // we'll insert it as is, without converting. That's because we're
+            // just summing, not averaging.
+            match output_table.get_mut(key) {
+                Some(existing) => {
+                    // No casting is done here, we're simply adding T +
+                    // T -> T.
+                    let new_values = dropped.points.values(0).unwrap();
+                    let existing_values = existing.points.values(0).unwrap();
+                    match (new_values, existing_values) {
+                        (
+                            ValueArray::Double(new_values),
+                            ValueArray::Double(existing_values),
+                        ) => {
+                            let new_timestamps = &dropped.points.timestamps;
+
+                            // We will be merging the new data with the
+                            // existing, but borrow-checking limits the degree
+                            // to which we can easily do this on the `existing`
+                            // entry in the output table. Instead, aggregate
+                            // everything into a copy of the expected data.
+                            let mut timestamps =
+                                existing.points.timestamps.clone();
+                            let mut values = existing_values.clone();
+
+                            // Merge in the new values, so long as they actually
+                            // exist. That is, we can just skip missing points
+                            // in this round, since they do not contribute to
+                            // the reduced value.
+                            for (new_timestamp, new_value) in new_timestamps
+                                .iter()
+                                .zip(new_values)
+                                .filter_map(|(timestamp, value)| {
+                                    if let Some(val) = value {
+                                        Some((*timestamp, *val))
+                                    } else {
+                                        None
+                                    }
+                                })
+                            {
+                                // We're really doing binary search, on both the
+                                // sample count map and the data array. They
+                                // both must exist, or both not, or we've done
+                                // our accounting incorrectly.
+                                let maybe_index =
+                                    timestamps.binary_search(&new_timestamp);
+                                match maybe_index {
+                                    Err(insert_at) => {
+                                        // This is a new timestamp. Insert it
+                                        // into the output timeseries.
+                                        timestamps
+                                            .insert(insert_at, new_timestamp);
+                                        values
+                                            .insert(insert_at, Some(new_value));
+                                    }
+                                    Ok(ix) => {
+                                        // This is an existing
+                                        // timestamp, so we only need to
+                                        // add the new value. If the value
+                                        // didn't exist before, replace it.
+                                        *values[ix].get_or_insert(0.0) +=
+                                            new_value;
+                                    }
+                                }
+                            }
+
+                            // Replace the existing output timeseries's
+                            // timestamps and data arrays.
+                            std::mem::swap(
+                                &mut existing.points.timestamps,
+                                &mut timestamps,
+                            );
+                            existing
+                                .points
+                                .values_mut(0)
+                                .unwrap()
+                                .swap(ValueArray::Double(values));
+                        }
+                        (
+                            ValueArray::Integer(new_values),
+                            ValueArray::Integer(existing_values),
+                        ) => {
+                            let new_timestamps = &dropped.points.timestamps;
+
+                            // We will be merging the new data with the
+                            // existing, but borrow-checking limits the degree
+                            // to which we can easily do this on the `existing`
+                            // entry in the output table. Instead, aggregate
+                            // everything into a copy of the expected data.
+                            let mut timestamps =
+                                existing.points.timestamps.clone();
+                            let mut values = existing_values.clone();
+
+                            // Merge in the new values, so long as they actually
+                            // exist. That is, we can just skip missing points
+                            // in this round, since they do not contribute to
+                            // the reduced value.
+                            for (new_timestamp, new_value) in new_timestamps
+                                .iter()
+                                .zip(new_values)
+                                .filter_map(|(timestamp, value)| {
+                                    if let Some(val) = value {
+                                        Some((*timestamp, *val))
+                                    } else {
+                                        None
+                                    }
+                                })
+                            {
+                                // We're really doing binary search, on both the
+                                // sample count map and the data array. They
+                                // both must exist, or both not, or we've done
+                                // our accounting incorrectly.
+                                let maybe_index =
+                                    timestamps.binary_search(&new_timestamp);
+                                match maybe_index {
+                                    Err(insert_at) => {
+                                        // This is a new timestamp. Insert it
+                                        // into the output timeseries.
+                                        timestamps
+                                            .insert(insert_at, new_timestamp);
+                                        values
+                                            .insert(insert_at, Some(new_value));
+                                    }
+                                    Ok(ix) => {
+                                        // This is an existing
+                                        // timestamp, so we only need to
+                                        // add the new value. If the value
+                                        // didn't exist before, replace it.
+                                        *values[ix].get_or_insert(0) +=
+                                            new_value;
+                                    }
+                                }
+                            }
+
+                            // Replace the existing output timeseries's
+                            // timestamps and data arrays.
+                            std::mem::swap(
+                                &mut existing.points.timestamps,
+                                &mut timestamps,
+                            );
+                            existing
+                                .points
+                                .values_mut(0)
+                                .unwrap()
+                                .swap(ValueArray::Integer(values));
+                        }
+                        _ => unreachable!(),
+                    }
+                }
+                None => output_table.insert(dropped)?,
+            }
+        }
+        Ok(vec![output_table])
+    }
+
+    // Reduce points in each group by averaging.
+    fn reduce_mean(&self, table: &Table) -> Result<Vec<Table>, Error> {
+        assert_eq!(self.reducer, Reducer::Mean);
+        let mut output_table = Table::new(table.name());
+        let kept_fields: Vec<_> =
+            self.identifiers.iter().map(Ident::as_str).collect();
+
+        // Keep track of the number of values at each output timestamp, within
+        // each group.
+        //
+        // As we iterate through timeseries, we reduce in-group points, so long
+        // as they occur at the same timestamp. And while timeseries must all be
+        // aligned the same way, they need not actually have identical
+        // timestamps. So what we're producing on the output is data at the
+        // union of all the input timestamps.
+        //
+        // These arrays keeps the count of values at each time, and may be either
+        // expanded or have its values incremented. Note that they're all
+        // doubles because we will be reducing at the end by dividing the sum at
+        // each point by the counts.
+        let mut sample_counts_by_group: BTreeMap<
+            TimeseriesKey,
+            BTreeMap<DateTime<Utc>, f64>,
+        > = BTreeMap::new();
+
+        for input in table.iter() {
+            Self::check_input_timeseries(input)?;
+
+            // Throw away the fields in this timeseries that are not in the
+            // group_by list.
+            let dropped = input.copy_with_fields(&kept_fields)?;
+            let key = dropped.key();
+
+            // Fetch the existing timeseries, if one exists. If one does _not_ exist,
+            // we'll insert the table with the data type converted to a double,
+            // since we're always averaging.
+            match output_table.get_mut(key) {
+                Some(existing) => {
+                    // Cast the new points to doubles, since we'll be
+                    // aggregating.
+                    let new_points =
+                        dropped.points.cast(&[DataType::Double])?;
+                    let ValueArray::Double(new_values) =
+                        new_points.values(0).unwrap()
+                    else {
+                        unreachable!();
+                    };
+                    let new_timestamps = &new_points.timestamps;
+
+                    // We will be merging the new data with the
+                    // existing, but borrow-checking limits the degree
+                    // to which we can easily do this on the `existing`
+                    // entry in the output table. Instead, aggregate
+                    // everything into a copy of the expected data.
+                    let mut timestamps = existing.points.timestamps.clone();
+                    let mut values = existing
+                        .points
+                        .values(0)
+                        .unwrap()
+                        .as_double()
+                        .unwrap()
+                        .clone();
+
+                    // Also fetch a reference to the existing counts by
+                    // timestamp for this group. This should exist.
+                    let counts = sample_counts_by_group.get_mut(&key).expect(
+                        "Should already have some sample counts for this group",
+                    );
+
+                    // Merge in the new values, so long as they actually
+                    // exist. That is, we can just skip missing points
+                    // in this round, since they do not contribute to
+                    // the reduced value.
+                    for (new_timestamp, new_value) in new_timestamps
+                        .iter()
+                        .zip(new_values)
+                        .filter_map(|(timestamp, value)| {
+                            if let Some(val) = value {
+                                Some((*timestamp, *val))
+                            } else {
+                                None
+                            }
+                        })
+                    {
+                        // We're really doing binary search, on both the
+                        // sample count map and the data array. They
+                        // both must exist, or both not, or we've done
+                        // our accounting incorrectly.
+                        let maybe_index =
+                            timestamps.binary_search(&new_timestamp);
+                        let count = counts.entry(new_timestamp);
+                        match (count, maybe_index) {
+                            (Entry::Vacant(entry), Err(insert_at)) => {
+                                // This is a new timestamp. Insert it
+                                // into the output timeseries, and count
+                                // it.
+                                timestamps.insert(insert_at, new_timestamp);
+                                values.insert(insert_at, Some(new_value));
+                                entry.insert(1.0);
+                            }
+                            (Entry::Occupied(mut entry), Ok(ix)) => {
+                                // This is an existing timestamp. _Add_
+                                // it into the output timeseries, and
+                                // count it. Its timestamp already
+                                // exists. If the value was previously None,
+                                // replace it now.
+                                *values[ix].get_or_insert(0.0) += new_value;
+                                *entry.get_mut() += 1.0;
+                            }
+                            (_, _) => {
+                                panic!(
+                                    "In-group counts and output \
+                                    values must both exist or \
+                                    both be missing"
+                                );
+                            }
+                        }
+                    }
+
+                    // Replace the existing output timeseries's
+                    // timestamps and data arrays.
+                    std::mem::swap(
+                        &mut existing.points.timestamps,
+                        &mut timestamps,
+                    );
+                    existing
+                        .points
+                        .values_mut(0)
+                        .unwrap()
+                        .swap(ValueArray::Double(values));
+                }
+                None => {
+                    // There were no previous points for this group.
+                    //
+                    // We'll cast to doubles, but _keep_ any missing samples
+                    // (None) that were in there. Those will have a "count" of
+                    // 0, so that we don't incorrectly over-divide in the case
+                    // where there are both missing and non-missing samples.
+                    let new_timeseries = dropped.cast(&[DataType::Double])?;
+                    let values = new_timeseries
+                        .points
+                        .values(0)
+                        .unwrap()
+                        .as_double()
+                        .unwrap();
+                    // Insert a count of 1.0 for each timestamp remaining, and
+                    // _zero_ for any where the values are none.
+                    let counts = new_timeseries
+                        .points
+                        .timestamps
+                        .iter()
+                        .zip(values)
+                        .map(|(timestamp, maybe_value)| {
+                            let count = f64::from(maybe_value.is_some());
+                            (*timestamp, count)
+                        })
+                        .collect();
+                    let old = sample_counts_by_group.insert(key, counts);
+                    assert!(old.is_none(), "Should not have counts entry for first timeseries in the group");
+                    output_table.insert(new_timeseries)?;
+                }
+            }
+        }
+
+        // Since we're computing the mean, we need to divide each output value
+        // by the number of values that went into it.
+        for each in output_table.iter_mut() {
+            let counts = sample_counts_by_group
+                .get(&each.key())
+                .expect("key should have been inserted earlier");
+            let ValueArray::Double(values) = each.points.values_mut(0).unwrap()
+            else {
+                unreachable!();
+            };
+            for (val, count) in values.iter_mut().zip(counts.values()) {
+                if let Some(x) = val.as_mut() {
+                    *x /= *count;
+                }
+            }
+        }
+        Ok(vec![output_table])
+    }
+}
+
+/// A reduction operation applied to unnamed columns during a group by.
+#[derive(Clone, Copy, Debug, Default, PartialEq)]
+pub enum Reducer {
+    #[default]
+    Mean,
+    Sum,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{GroupBy, Reducer};
+    use crate::oxql::{
+        ast::{
+            ident::Ident,
+            table_ops::align::{Align, AlignmentMethod},
+        },
+        point::{DataType, MetricType, ValueArray},
+        Table, Timeseries,
+    };
+    use chrono::{DateTime, Utc};
+    use oximeter::FieldValue;
+    use std::{collections::BTreeMap, time::Duration};
+
+    // Which timeseries the second data point is missing from.
+    #[derive(Clone, Copy, Debug)]
+    enum MissingValue {
+        Neither,
+        First,
+        Both,
+    }
+
+    #[derive(Clone, Copy, Debug)]
+    struct TestConfig {
+        missing_value: MissingValue,
+        overlapping_times: bool,
+        reducer: Reducer,
+    }
+
+    #[derive(Clone, Debug)]
+    #[allow(dead_code)]
+    struct TestTable {
+        aligned_table: Table,
+        grouped_table: Table,
+        query_end: DateTime<Utc>,
+        timestamps: Vec<DateTime<Utc>>,
+    }
+
+    impl TestTable {
+        fn new(cfg: TestConfig) -> Self {
+            let query_end = Utc::now();
+            let mut timestamps = vec![
+                query_end - Duration::from_secs(2),
+                query_end - Duration::from_secs(1),
+                query_end,
+            ];
+
+            // Create the first timeseries.
+            //
+            // This has two fields, one of which we'll group by. There are three
+            // timepoints of double values.
+            let mut fields = BTreeMap::new();
+            fields.insert("int".to_string(), FieldValue::U8(0));
+            fields.insert(
+                "name".to_string(),
+                FieldValue::String("whodat".into()),
+            );
+            let mut ts0 = Timeseries::new(
+                fields.into_iter(),
+                DataType::Double,
+                MetricType::Gauge,
+            )
+            .unwrap();
+            ts0.points.start_times = None;
+            ts0.points.timestamps = timestamps.clone();
+            *ts0.points.values_mut(0).unwrap() = ValueArray::Double(vec![
+                Some(1.0),
+                if matches!(
+                    cfg.missing_value,
+                    MissingValue::First | MissingValue::Both
+                ) {
+                    None
+                } else {
+                    Some(2.0)
+                },
+                Some(3.0),
+            ]);
+
+            // Create the second timeseries.
+            //
+            // This is nearly the same, and shares the same field value for the
+            // "int" field. When we group, we should reduce these two timeseries
+            // together.
+            let mut fields = BTreeMap::new();
+            fields.insert("int".to_string(), FieldValue::U8(0));
+            fields.insert(
+                "name".to_string(),
+                FieldValue::String("whodis".into()),
+            );
+            let mut ts1 = Timeseries::new(
+                fields.into_iter(),
+                DataType::Double,
+                MetricType::Gauge,
+            )
+            .unwrap();
+            ts1.points.start_times = None;
+
+            // Non-overlapping in this test setup means that we just shift one
+            // value from this array backward in time by one additional second.
+            // So we should have timestamps like:
+            //
+            // ts0: [ _, t0, t1, t2 ]
+            // ts1: [ t0, _, t1, t2 ]
+            //
+            // When reducing, t0 is never changed, and t1-t2 are always reduced
+            // together, if the values are present.
+            ts1.points.timestamps = if cfg.overlapping_times {
+                timestamps.clone()
+            } else {
+                let mut new_timestamps = timestamps.clone();
+                new_timestamps[0] = new_timestamps[0] - Duration::from_secs(1);
+                timestamps.insert(0, new_timestamps[0]);
+                new_timestamps
+            };
+            *ts1.points.values_mut(0).unwrap() = ValueArray::Double(vec![
+                Some(2.0),
+                if matches!(cfg.missing_value, MissingValue::Both) {
+                    None
+                } else {
+                    Some(3.0)
+                },
+                Some(4.0),
+            ]);
+
+            let mut table = Table::new("foo");
+            table.insert(ts0).unwrap();
+            table.insert(ts1).unwrap();
+
+            // Align the actual table, based on the input, and apply the right
+            // group-by
+            let align = Align {
+                method: AlignmentMethod::MeanWithin,
+                period: Duration::from_secs(1),
+            };
+            let aligned_tables = align.apply(&[table], &query_end).unwrap();
+            let group_by = GroupBy {
+                identifiers: vec![Ident("int".into())],
+                reducer: cfg.reducer,
+            };
+            let grouped_tables = group_by.apply(&aligned_tables).unwrap();
+            assert_eq!(
+                grouped_tables.len(),
+                1,
+                "Group by should produce exaclty 1 table"
+            );
+            let grouped_table = grouped_tables.into_iter().next().unwrap();
+            let aligned_table = aligned_tables.into_iter().next().unwrap();
+
+            let test =
+                Self { timestamps, aligned_table, grouped_table, query_end };
+
+            // These checks are all valid for grouping in general, independent
+            // of the exact missing values or reducer.
+            assert_eq!(
+                test.grouped_table.len(),
+                1,
+                "Should have grouped both timeseries down to 1"
+            );
+            let grouped_timeseries = test.grouped_table.iter().next().unwrap();
+            assert_eq!(
+                grouped_timeseries.fields.len(),
+                1,
+                "Should have only one grouped-by field"
+            );
+            assert_eq!(
+                grouped_timeseries.fields.get("int").unwrap(),
+                &FieldValue::U8(0),
+                "Grouped-by field was not maintained correctly"
+            );
+            let points = &grouped_timeseries.points;
+            assert_eq!(points.dimensionality(), 1, "Points should still be 1D");
+            assert_eq!(
+                points.start_times, None,
+                "Points should not have start times"
+            );
+            assert_eq!(
+                points.timestamps, test.timestamps,
+                "Points do not have correct timestamps"
+            );
+
+            test
+        }
+    }
+
+    #[test]
+    fn test_group_by() {
+        const TEST_CASES: &[(TestConfig, &[Option<f64>])] = &[
+            (
+                TestConfig {
+                    missing_value: MissingValue::Neither,
+                    overlapping_times: true,
+                    reducer: Reducer::Mean,
+                },
+                // This is the most basic case, where we simply average all the
+                // values together. They exactly line up and none are missing.
+                &[Some(1.5), Some(2.5), Some(3.5)],
+            ),
+            (
+                TestConfig {
+                    missing_value: MissingValue::Neither,
+                    overlapping_times: true,
+                    reducer: Reducer::Sum,
+                },
+                // This is the next-simplest case, where we simply sum all the
+                // values together. They exactly line up and none are missing.
+                &[Some(3.0), Some(5.0), Some(7.0)],
+            ),
+            (
+                TestConfig {
+                    missing_value: MissingValue::Neither,
+                    overlapping_times: false,
+                    reducer: Reducer::Mean,
+                },
+                // In this case, the timestamps don't all overlap, though some
+                // of them do. In particular, the arrays are shifted by one
+                // timestamp relative to each other, so there are 2 extra
+                // values. The one value that does overlap is averaged, and the
+                // other two are unchanged.
+                &[Some(2.0), Some(1.0), Some(2.5), Some(3.5)],
+            ),
+            (
+                TestConfig {
+                    missing_value: MissingValue::Neither,
+                    overlapping_times: false,
+                    reducer: Reducer::Sum,
+                },
+                // Here, we should have 4 output samples because the timestamps
+                // don't overlap. The second input timeseries has its first
+                // point shifted back by one second. That means the first two
+                // values are just from one array (no reduction), while the next
+                // two are reduced as usual.
+                &[Some(2.0), Some(1.0), Some(5.0), Some(7.0)],
+            ),
+            (
+                TestConfig {
+                    missing_value: MissingValue::First,
+                    overlapping_times: true,
+                    reducer: Reducer::Mean,
+                },
+                // In this case, we have a missing value for the middle
+                // timestamp of the first input timeseries. That means we should
+                // still have 3 output samples, but the second point isn't an
+                // aggregation, it's just the input value, from the second
+                // timeseries.
+                &[Some(1.5), Some(3.0), Some(3.5)],
+            ),
+            (
+                TestConfig {
+                    missing_value: MissingValue::First,
+                    overlapping_times: true,
+                    reducer: Reducer::Sum,
+                },
+                // Same as above, but we're summing, not averaging.
+                &[Some(3.0), Some(3.0), Some(7.0)],
+            ),
+            (
+                TestConfig {
+                    missing_value: MissingValue::First,
+                    overlapping_times: false,
+                    reducer: Reducer::Mean,
+                },
+                // We need 4 output points again here, but we also have a
+                // missing value. So we'll take the first value from the second
+                // timeseries; the second from the first; the second from the
+                // second directly, since its corresponding point is missing in
+                // the first, and then the average of both in the last point.
+                &[Some(2.0), Some(1.0), Some(3.0), Some(3.5)],
+            ),
+            (
+                TestConfig {
+                    missing_value: MissingValue::First,
+                    overlapping_times: false,
+                    reducer: Reducer::Sum,
+                },
+                // Same as above, but summing, instead of averaging.
+                &[Some(2.0), Some(1.0), Some(3.0), Some(7.0)],
+            ),
+            (
+                TestConfig {
+                    missing_value: MissingValue::Both,
+                    overlapping_times: true,
+                    reducer: Reducer::Mean,
+                },
+                // In this case, the 2nd timepoint is missing from both
+                // timeseries. We should preserve that as a missing value in the
+                // output.
+                &[Some(1.5), None, Some(3.5)],
+            ),
+            (
+                TestConfig {
+                    missing_value: MissingValue::Both,
+                    overlapping_times: true,
+                    reducer: Reducer::Sum,
+                },
+                // Same as above, but summing instead of averaging.
+                &[Some(3.0), None, Some(7.0)],
+            ),
+        ];
+        for (test_config, expected_data) in TEST_CASES.iter() {
+            let test_table = TestTable::new(*test_config);
+            let grouped_timeseries =
+                test_table.grouped_table.iter().next().unwrap();
+            let points = &grouped_timeseries.points;
+            let values = points.values(0).unwrap().as_double().unwrap();
+            assert_eq!(
+                values, expected_data,
+                "Timeseries values were not grouped correctly, \
+                test_config = {test_config:?}"
+            );
+        }
+    }
+}
diff --git a/oximeter/db/src/oxql/ast/table_ops/join.rs b/oximeter/db/src/oxql/ast/table_ops/join.rs
new file mode 100644
index 0000000000..3c150a4acf
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/table_ops/join.rs
@@ -0,0 +1,385 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! An AST node describing join table operations.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::point::MetricType;
+use crate::oxql::point::Points;
+use crate::oxql::point::Values;
+use crate::oxql::Error;
+use crate::oxql::Table;
+use anyhow::Context;
+
+/// An AST node for a natural inner join.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Join;
+impl Join {
+    // Apply the group_by table operation.
+    pub(crate) fn apply(&self, tables: &[Table]) -> Result<Vec<Table>, Error> {
+        anyhow::ensure!(
+            tables.len() > 1,
+            "Join operations require more than one table",
+        );
+        let mut tables = tables.iter().cloned().enumerate();
+        let (_, mut out) = tables.next().unwrap();
+        anyhow::ensure!(
+            out.is_aligned(),
+            "Input tables for a join operation must be aligned"
+        );
+        let metric_types = out
+            .iter()
+            .next()
+            .context("Input tables for a join operation may not be empty")?
+            .points
+            .metric_types()
+            .collect::<Vec<_>>();
+        ensure_all_metric_types(metric_types.iter().copied())?;
+        let alignment = out.alignment();
+        assert!(alignment.is_some());
+
+        for (i, next_table) in tables {
+            anyhow::ensure!(
+                next_table.alignment() == alignment,
+                "All tables to a join operator must have the same \
+                alignment. Expected alignment: {:?}, found a table \
+                aligned with: {:?}",
+                alignment.unwrap(),
+                next_table.alignment(),
+            );
+            let name = next_table.name().to_string();
+            for next_timeseries in next_table.into_iter() {
+                let new_types =
+                    next_timeseries.points.metric_types().collect::<Vec<_>>();
+                ensure_all_metric_types(new_types.iter().copied())?;
+                anyhow::ensure!(
+                    metric_types == new_types,
+                    "Input tables do not all share the same metric types"
+                );
+
+                let key = next_timeseries.key();
+                let Some(timeseries) = out.iter_mut().find(|t| t.key() == key)
+                else {
+                    anyhow::bail!(
+                        "Join failed, input table {} does not \
+                        contain a timeseries with key {}",
+                        i,
+                        key,
+                    );
+                };
+
+                // Joining the timeseries is done by stacking together the
+                // values that have the same timestamp.
+                //
+                // If two value arrays have different timestamps, which is
+                // possible if they're derived from two separately-aligned
+                // tables, then we need to correctly ensure that:
+                //
+                // 1. They have the same alignment, and
+                // 2. We merge the timepoints rather than simply creating a
+                //    ragged array of points.
+                timeseries.points = inner_join_point_arrays(
+                    &timeseries.points,
+                    &next_timeseries.points,
+                )?;
+            }
+            // We'll also update the name, to indicate the joined data.
+            out.name.push(',');
+            out.name.push_str(&name);
+        }
+        Ok(vec![out])
+    }
+}
+
+// Given two arrays of points, stack them together at matching timepoints.
+//
+// For time points in either which do not have a corresponding point in the
+// other, the entire time point is elided.
+fn inner_join_point_arrays(
+    left: &Points,
+    right: &Points,
+) -> Result<Points, Error> {
+    // Create an output array with roughly the right capacity, and double the
+    // number of dimensions. We're trying to stack output value arrays together
+    // along the dimension axis.
+    let data_types =
+        left.data_types().chain(right.data_types()).collect::<Vec<_>>();
+    let metric_types =
+        left.metric_types().chain(right.metric_types()).collect::<Vec<_>>();
+    let mut out = Points::with_capacity(
+        left.len().max(right.len()),
+        data_types.iter().copied(),
+        metric_types.iter().copied(),
+    )?;
+
+    // Iterate through each array until one is exhausted. We're only inserting
+    // values from both arrays where the timestamps actually match, since this
+    // is an inner join. We may want to insert missing values where timestamps
+    // do not match on either side, when we support an outer join of some kind.
+    let n_left_dim = left.values.len();
+    let mut left_ix = 0;
+    let mut right_ix = 0;
+    while left_ix < left.len() && right_ix < right.len() {
+        let left_timestamp = left.timestamps[left_ix];
+        let right_timestamp = right.timestamps[right_ix];
+        if left_timestamp == right_timestamp {
+            out.timestamps.push(left_timestamp);
+            push_concrete_values(
+                &mut out.values[..n_left_dim],
+                &left.values,
+                left_ix,
+            );
+            push_concrete_values(
+                &mut out.values[n_left_dim..],
+                &right.values,
+                right_ix,
+            );
+            left_ix += 1;
+            right_ix += 1;
+        } else if left_timestamp < right_timestamp {
+            left_ix += 1;
+        } else {
+            right_ix += 1;
+        }
+    }
+    Ok(out)
+}
+
+// Push the `i`th value from each dimension of `from` onto `to`.
+fn push_concrete_values(to: &mut [Values], from: &[Values], i: usize) {
+    assert_eq!(to.len(), from.len());
+    for (output, input) in to.iter_mut().zip(from.iter()) {
+        let input_array = &input.values;
+        let output_array = &mut output.values;
+        assert_eq!(input_array.data_type(), output_array.data_type());
+        if let Ok(ints) = input_array.as_integer() {
+            output_array.as_integer_mut().unwrap().push(ints[i]);
+            continue;
+        }
+        if let Ok(doubles) = input_array.as_double() {
+            output_array.as_double_mut().unwrap().push(doubles[i]);
+            continue;
+        }
+        if let Ok(bools) = input_array.as_boolean() {
+            output_array.as_boolean_mut().unwrap().push(bools[i]);
+            continue;
+        }
+        if let Ok(strings) = input_array.as_string() {
+            output_array.as_string_mut().unwrap().push(strings[i].clone());
+            continue;
+        }
+        if let Ok(dists) = input_array.as_integer_distribution() {
+            output_array
+                .as_integer_distribution_mut()
+                .unwrap()
+                .push(dists[i].clone());
+            continue;
+        }
+        if let Ok(dists) = input_array.as_double_distribution() {
+            output_array
+                .as_double_distribution_mut()
+                .unwrap()
+                .push(dists[i].clone());
+            continue;
+        }
+        unreachable!();
+    }
+}
+
+// Return an error if any metric types are not suitable for joining.
+fn ensure_all_metric_types(
+    mut metric_types: impl ExactSizeIterator<Item = MetricType>,
+) -> Result<(), Error> {
+    anyhow::ensure!(
+        metric_types
+            .all(|mt| matches!(mt, MetricType::Gauge | MetricType::Delta)),
+        "Join operation requires timeseries with gauge or \
+        delta metric types",
+    );
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::oxql::point::DataType;
+    use crate::oxql::point::Datum;
+    use crate::oxql::point::ValueArray;
+    use chrono::Utc;
+    use std::time::Duration;
+
+    #[test]
+    fn test_push_concrete_values() {
+        let mut points = Points::with_capacity(
+            2,
+            [DataType::Integer, DataType::Double].into_iter(),
+            [MetricType::Gauge, MetricType::Gauge].into_iter(),
+        )
+        .unwrap();
+
+        // Push a concrete value for the integer dimension
+        let from_ints = vec![Values {
+            values: ValueArray::Integer(vec![Some(1)]),
+            metric_type: MetricType::Gauge,
+        }];
+        push_concrete_values(&mut points.values[..1], &from_ints, 0);
+
+        // And another for the double dimension.
+        let from_doubles = vec![Values {
+            values: ValueArray::Double(vec![Some(2.0)]),
+            metric_type: MetricType::Gauge,
+        }];
+        push_concrete_values(&mut points.values[1..], &from_doubles, 0);
+
+        assert_eq!(
+            points.dimensionality(),
+            2,
+            "Points should have 2 dimensions",
+        );
+        let ints = points.values[0].values.as_integer().unwrap();
+        assert_eq!(
+            ints.len(),
+            1,
+            "Should have pushed one point in the first dimension"
+        );
+        assert_eq!(
+            ints[0],
+            Some(1),
+            "Should have pushed 1 onto the first dimension"
+        );
+        let doubles = points.values[1].values.as_double().unwrap();
+        assert_eq!(
+            doubles.len(),
+            1,
+            "Should have pushed one point in the second dimension"
+        );
+        assert_eq!(
+            doubles[0],
+            Some(2.0),
+            "Should have pushed 2.0 onto the second dimension"
+        );
+    }
+
+    #[test]
+    fn test_join_point_arrays() {
+        let now = Utc::now();
+
+        // Create a set of integer points to join with.
+        //
+        // This will have two timestamps, one of which will match the points
+        // below that are merged in.
+        let int_points = Points {
+            start_times: None,
+            timestamps: vec![
+                now - Duration::from_secs(3),
+                now - Duration::from_secs(2),
+                now,
+            ],
+            values: vec![Values {
+                values: ValueArray::Integer(vec![Some(1), Some(2), Some(3)]),
+                metric_type: MetricType::Gauge,
+            }],
+        };
+
+        // Create an additional set of double points.
+        //
+        // This also has two timepoints, one of which matches with the above,
+        // and one of which does not.
+        let double_points = Points {
+            start_times: None,
+            timestamps: vec![
+                now - Duration::from_secs(3),
+                now - Duration::from_secs(1),
+                now,
+            ],
+            values: vec![Values {
+                values: ValueArray::Double(vec![
+                    Some(4.0),
+                    Some(5.0),
+                    Some(6.0),
+                ]),
+                metric_type: MetricType::Gauge,
+            }],
+        };
+
+        // Merge the arrays.
+        let merged =
+            inner_join_point_arrays(&int_points, &double_points).unwrap();
+
+        // Basic checks that we merged in the right values and have the right
+        // types and dimensions.
+        assert_eq!(
+            merged.dimensionality(),
+            2,
+            "Should have appended the dimensions from each input array"
+        );
+        assert_eq!(merged.len(), 2, "Should have merged two common points",);
+        assert_eq!(
+            merged.data_types().collect::<Vec<_>>(),
+            &[DataType::Integer, DataType::Double],
+            "Should have combined the data types of the input arrays"
+        );
+        assert_eq!(
+            merged.metric_types().collect::<Vec<_>>(),
+            &[MetricType::Gauge, MetricType::Gauge],
+            "Should have combined the metric types of the input arrays"
+        );
+
+        // Check the actual values of the array.
+        let mut points = merged.iter_points();
+
+        // The first and last timepoint overlapped between the two arrays, so we
+        // should have both of them as concrete samples.
+        let pt = points.next().unwrap();
+        assert_eq!(pt.start_time, None, "Gauges don't have a start time");
+        assert_eq!(
+            *pt.timestamp, int_points.timestamps[0],
+            "Should have taken the first input timestamp from both arrays",
+        );
+        assert_eq!(
+            *pt.timestamp, double_points.timestamps[0],
+            "Should have taken the first input timestamp from both arrays",
+        );
+        let values = pt.values;
+        assert_eq!(values.len(), 2, "Should have 2 dimensions");
+        assert_eq!(
+            &values[0],
+            &(Datum::Integer(Some(&1)), MetricType::Gauge),
+            "Should have pulled value from first integer array."
+        );
+        assert_eq!(
+            &values[1],
+            &(Datum::Double(Some(&4.0)), MetricType::Gauge),
+            "Should have pulled value from second double array."
+        );
+
+        // And the next point
+        let pt = points.next().unwrap();
+        assert_eq!(pt.start_time, None, "Gauges don't have a start time");
+        assert_eq!(
+            *pt.timestamp, int_points.timestamps[2],
+            "Should have taken the input timestamp from both arrays",
+        );
+        assert_eq!(
+            *pt.timestamp, double_points.timestamps[2],
+            "Should have taken the input timestamp from both arrays",
+        );
+        let values = pt.values;
+        assert_eq!(values.len(), 2, "Should have 2 dimensions");
+        assert_eq!(
+            &values[0],
+            &(Datum::Integer(Some(&3)), MetricType::Gauge),
+            "Should have pulled value from first integer array."
+        );
+        assert_eq!(
+            &values[1],
+            &(Datum::Double(Some(&6.0)), MetricType::Gauge),
+            "Should have pulled value from second double array."
+        );
+
+        // And there should be no other values.
+        assert!(points.next().is_none(), "There should be no more points");
+    }
+}
diff --git a/oximeter/db/src/oxql/ast/table_ops/mod.rs b/oximeter/db/src/oxql/ast/table_ops/mod.rs
new file mode 100644
index 0000000000..d9930962f8
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/table_ops/mod.rs
@@ -0,0 +1,76 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! AST nodes for table operations.
+
+// Copyright 2024 Oxide Computer Company
+
+pub mod align;
+pub mod filter;
+pub mod get;
+pub mod group_by;
+pub mod join;
+
+use self::align::Align;
+use self::filter::Filter;
+use self::group_by::GroupBy;
+use self::join::Join;
+use crate::oxql::ast::Query;
+use crate::oxql::Error;
+use crate::oxql::Table;
+use chrono::DateTime;
+use chrono::Utc;
+use oximeter::TimeseriesName;
+
+/// A basic table operation, the atoms of an OxQL query.
+#[derive(Clone, Debug, PartialEq)]
+pub enum BasicTableOp {
+    Get(TimeseriesName),
+    Filter(Filter),
+    GroupBy(GroupBy),
+    Join(Join),
+    Align(Align),
+}
+
+impl BasicTableOp {
+    pub(crate) fn apply(
+        &self,
+        tables: &[Table],
+        query_end: &DateTime<Utc>,
+    ) -> Result<Vec<Table>, Error> {
+        match self {
+            BasicTableOp::Get(_) => panic!("Should not apply get table ops"),
+            BasicTableOp::Filter(f) => f.apply(tables),
+            BasicTableOp::GroupBy(g) => g.apply(tables),
+            BasicTableOp::Join(j) => j.apply(tables),
+            BasicTableOp::Align(a) => a.apply(tables, query_end),
+        }
+    }
+}
+
+/// A grouped table operation is a subquery in OxQL.
+#[derive(Clone, Debug, PartialEq)]
+pub struct GroupedTableOp {
+    pub ops: Vec<Query>,
+}
+
+/// Any kind of OxQL table operation.
+#[derive(Clone, Debug, PartialEq)]
+pub enum TableOp {
+    Basic(BasicTableOp),
+    Grouped(GroupedTableOp),
+}
+
+impl TableOp {
+    pub(crate) fn apply(
+        &self,
+        tables: &[Table],
+        query_end: &DateTime<Utc>,
+    ) -> Result<Vec<Table>, Error> {
+        let TableOp::Basic(basic) = self else {
+            panic!("Should not apply grouped table ops");
+        };
+        basic.apply(tables, query_end)
+    }
+}
diff --git a/oximeter/db/src/oxql/mod.rs b/oximeter/db/src/oxql/mod.rs
new file mode 100644
index 0000000000..b93d75b859
--- /dev/null
+++ b/oximeter/db/src/oxql/mod.rs
@@ -0,0 +1,39 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! The Oximeter Query Language, OxQL.
+
+// Copyright 2024 Oxide Computer Company
+
+use peg::error::ParseError as PegError;
+use peg::str::LineCol;
+
+pub mod ast;
+pub mod point;
+pub mod query;
+pub mod table;
+
+pub use self::query::Query;
+pub use self::table::Table;
+pub use self::table::Timeseries;
+pub use anyhow::Error;
+
+// Format a PEG parsing error into a nice anyhow error.
+fn fmt_parse_error(source: &str, err: PegError<LineCol>) -> Error {
+    use std::fmt::Write;
+    let mut out =
+        format!("Error at {}:{}", err.location.line, err.location.column);
+    const CONTEXT: usize = 24;
+    let start = err.location.offset.saturating_sub(CONTEXT);
+    let end = err.location.offset.saturating_add(CONTEXT).min(source.len());
+    if let Some(context) = source.get(start..end) {
+        let prefix_len = out.len() + 2;
+        writeln!(out, ": .. {context} ..").unwrap();
+        let left_pad = err.location.offset - start + 3 + prefix_len;
+        let right_pad = end - err.location.offset + 3 + prefix_len;
+        writeln!(out, "{:<left_pad$}^{:>right_pad$}", ' ', ' ').unwrap();
+    }
+    writeln!(out, "Expected: {}", err).unwrap();
+    anyhow::anyhow!(out)
+}
diff --git a/oximeter/db/src/oxql/point.rs b/oximeter/db/src/oxql/point.rs
new file mode 100644
index 0000000000..e12214aaf0
--- /dev/null
+++ b/oximeter/db/src/oxql/point.rs
@@ -0,0 +1,2040 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Definition of data points for OxQL.
+
+// Copyright 2024 Oxide Computer Company
+
+use super::Error;
+use anyhow::Context;
+use chrono::DateTime;
+use chrono::Utc;
+use num::ToPrimitive;
+use oximeter::DatumType;
+use oximeter::Measurement;
+use schemars::JsonSchema;
+use serde::Deserialize;
+use serde::Serialize;
+use std::fmt;
+
+/// The type of each individual data point's value in a timeseries.
+#[derive(
+    Clone, Copy, Debug, Deserialize, Hash, JsonSchema, PartialEq, Serialize,
+)]
+#[serde(rename_all = "snake_case")]
+pub enum DataType {
+    /// A 64-bit integer.
+    Integer,
+    /// A 64-bit float.
+    Double,
+    /// A boolean.
+    Boolean,
+    /// A string.
+    String,
+    /// A distribution, a sequence of integer bins and counts.
+    IntegerDistribution,
+    /// A distribution, a sequence of double bins and integer counts.
+    DoubleDistribution,
+}
+
+impl DataType {
+    /// True if this is a numeric scalar type.
+    pub fn is_numeric(&self) -> bool {
+        matches!(self, DataType::Integer | DataType::Double)
+    }
+}
+
+impl TryFrom<DatumType> for DataType {
+    type Error = Error;
+
+    fn try_from(datum_type: DatumType) -> Result<Self, Self::Error> {
+        let data_type = match datum_type {
+            DatumType::Bool => DataType::Boolean,
+            DatumType::I8
+            | DatumType::U8
+            | DatumType::I16
+            | DatumType::U16
+            | DatumType::I32
+            | DatumType::U32
+            | DatumType::I64
+            | DatumType::U64
+            | DatumType::CumulativeI64
+            | DatumType::CumulativeU64 => DataType::Integer,
+            DatumType::F32
+            | DatumType::F64
+            | DatumType::CumulativeF32
+            | DatumType::CumulativeF64 => DataType::Double,
+            DatumType::String => DataType::String,
+            DatumType::HistogramI8
+            | DatumType::HistogramU8
+            | DatumType::HistogramI16
+            | DatumType::HistogramU16
+            | DatumType::HistogramI32
+            | DatumType::HistogramU32
+            | DatumType::HistogramI64
+            | DatumType::HistogramU64 => DataType::IntegerDistribution,
+            DatumType::HistogramF32 | DatumType::HistogramF64 => {
+                DataType::DoubleDistribution
+            }
+            DatumType::Bytes => {
+                anyhow::bail!("Unsupported datum type: {}", datum_type)
+            }
+        };
+        Ok(data_type)
+    }
+}
+
+impl fmt::Display for DataType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+/// The type of the metric itself, indicating what its values represent.
+#[derive(
+    Clone, Copy, Debug, Deserialize, Hash, JsonSchema, PartialEq, Serialize,
+)]
+#[serde(rename_all = "snake_case")]
+pub enum MetricType {
+    /// The value represents an instantaneous measurement in time.
+    Gauge,
+    /// The value represents a difference between two points in time.
+    Delta,
+    /// The value represents an accumulation between two points in time.
+    Cumulative,
+}
+
+impl fmt::Display for MetricType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+// A converted oximeter datum, used internally.
+//
+// This is used when computing deltas between cumulative measurements, and so
+// only represents the possible cumulative types.
+#[derive(Clone, Debug, PartialEq)]
+enum CumulativeDatum {
+    Integer(i64),
+    Double(f64),
+    IntegerDistribution(Distribution<i64>),
+    DoubleDistribution(Distribution<f64>),
+}
+
+impl CumulativeDatum {
+    // Construct a datum from a cumulative type, failing if the measurement is
+    // not cumulative.
+    fn from_cumulative(meas: &Measurement) -> Result<Self, Error> {
+        let datum = match meas.datum() {
+            oximeter::Datum::CumulativeI64(val) => {
+                CumulativeDatum::Integer(val.value())
+            }
+            oximeter::Datum::CumulativeU64(val) => {
+                let int = val
+                    .value()
+                    .try_into()
+                    .context("Overflow converting u64 to i64")?;
+                CumulativeDatum::Integer(int)
+            }
+            oximeter::Datum::CumulativeF32(val) => {
+                CumulativeDatum::Double(val.value().into())
+            }
+            oximeter::Datum::CumulativeF64(val) => {
+                CumulativeDatum::Double(val.value())
+            }
+            oximeter::Datum::HistogramI8(hist) => hist.into(),
+            oximeter::Datum::HistogramU8(hist) => hist.into(),
+            oximeter::Datum::HistogramI16(hist) => hist.into(),
+            oximeter::Datum::HistogramU16(hist) => hist.into(),
+            oximeter::Datum::HistogramI32(hist) => hist.into(),
+            oximeter::Datum::HistogramU32(hist) => hist.into(),
+            oximeter::Datum::HistogramI64(hist) => hist.into(),
+            oximeter::Datum::HistogramU64(hist) => hist.try_into()?,
+            oximeter::Datum::HistogramF32(hist) => hist.into(),
+            oximeter::Datum::HistogramF64(hist) => hist.into(),
+            other => anyhow::bail!(
+                "Input datum of type {} is not cumulative",
+                other.datum_type(),
+            ),
+        };
+        Ok(datum)
+    }
+}
+
+/// A single list of values, for one dimension of a timeseries.
+#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)]
+pub struct Values {
+    // The data values.
+    pub(super) values: ValueArray,
+    // The type of this metric.
+    pub(super) metric_type: MetricType,
+}
+
+impl Values {
+    // Construct an empty array of values to hold the provided types.
+    fn with_capacity(
+        size: usize,
+        data_type: DataType,
+        metric_type: MetricType,
+    ) -> Self {
+        Self { values: ValueArray::with_capacity(size, data_type), metric_type }
+    }
+
+    fn len(&self) -> usize {
+        self.values.len()
+    }
+}
+
+/// Reference type describing a single point in a `Points` array.
+///
+/// The `Points` type is column-major, in that the timestamps and each data
+/// value (one for each dimension) are stored in separate arrays, of the same
+/// length. This type holds references to the relevant items in each array that
+/// constitutes a single point.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Point<'a> {
+    /// The start time of this point, if any.
+    pub start_time: Option<&'a DateTime<Utc>>,
+    /// The timestamp for this point.
+    pub timestamp: &'a DateTime<Utc>,
+    /// One datum and its metric type, for each dimension in the point.
+    ///
+    /// The datum itself is optional, and will be `None` if the point is missing
+    /// a value at the corresponding point and dimension.
+    pub values: Vec<(Datum<'a>, MetricType)>,
+}
+
+impl<'a> fmt::Display for Point<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        const TIMESTAMP_FMT: &str = "%Y-%m-%d %H:%M:%S.%f";
+        match &self.start_time {
+            Some(start_time) => write!(
+                f,
+                "[{}, {}]: ",
+                start_time.format(TIMESTAMP_FMT),
+                self.timestamp.format(TIMESTAMP_FMT)
+            )?,
+            None => write!(f, "{}: ", self.timestamp.format(TIMESTAMP_FMT))?,
+        }
+        let values = self
+            .values
+            .iter()
+            .map(|(datum, _)| datum.to_string())
+            .collect::<Vec<_>>()
+            .join(",");
+        write!(f, "[{}]", values)
+    }
+}
+
+impl<'a> Point<'a> {
+    /// Return the dimensionality of this point.
+    pub fn dimensionality(&self) -> usize {
+        self.values.len()
+    }
+}
+
+/// A reference to a single datum of a multidimensional value.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum Datum<'a> {
+    Boolean(Option<bool>),
+    Integer(Option<&'a i64>),
+    Double(Option<&'a f64>),
+    String(Option<&'a str>),
+    IntegerDistribution(Option<&'a Distribution<i64>>),
+    DoubleDistribution(Option<&'a Distribution<f64>>),
+}
+
+impl<'a> fmt::Display for Datum<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Datum::Boolean(Some(inner)) => write!(f, "{}", inner),
+            Datum::Integer(Some(inner)) => write!(f, "{}", inner),
+            Datum::Double(Some(inner)) => write!(f, "{}", inner),
+            Datum::String(Some(inner)) => write!(f, "{}", inner),
+            Datum::IntegerDistribution(Some(inner)) => write!(f, "{}", inner),
+            Datum::DoubleDistribution(Some(inner)) => write!(f, "{}", inner),
+            Datum::Boolean(None)
+            | Datum::Integer(None)
+            | Datum::Double(None)
+            | Datum::String(None)
+            | Datum::IntegerDistribution(None)
+            | Datum::DoubleDistribution(None) => {
+                write!(f, "-")
+            }
+        }
+    }
+}
+
+/// Timepoints and values for one timeseries.
+//
+// Invariants:
+//
+// The start_time and timestamp arrays must be the same length, or start_times
+// must be None.
+//
+// The length of timestamps (and possibly start_times) must be the same as the
+// length of _each element_ of the `values` array. That is, there are as many
+// timestamps as data values.
+//
+// The length of `values` is the number of dimensions, and is always at least 1.
+#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)]
+pub struct Points {
+    // The start time points for cumulative or delta metrics.
+    pub(super) start_times: Option<Vec<DateTime<Utc>>>,
+    // The timestamp of each value.
+    pub(super) timestamps: Vec<DateTime<Utc>>,
+    // The array of data values, one for each dimension.
+    pub(super) values: Vec<Values>,
+}
+
+impl Points {
+    /// Construct an empty array of points to hold data of the provided type.
+    pub fn empty(data_type: DataType, metric_type: MetricType) -> Self {
+        Self::with_capacity(
+            0,
+            std::iter::once(data_type),
+            std::iter::once(metric_type),
+        )
+        .unwrap()
+    }
+
+    // Return a mutable reference to the value array of the specified dimension, if any.
+    pub(super) fn values_mut(&mut self, dim: usize) -> Option<&mut ValueArray> {
+        self.values.get_mut(dim).map(|val| &mut val.values)
+    }
+
+    /// Return a reference to the value array of the specified dimension, if any.
+    pub fn values(&self, dim: usize) -> Option<&ValueArray> {
+        self.values.get(dim).map(|val| &val.values)
+    }
+
+    /// Return the dimensionality of the data points, i.e., the number of values
+    /// at each timestamp.
+    pub fn dimensionality(&self) -> usize {
+        self.values.len()
+    }
+
+    /// Return the number of points in self.
+    pub fn len(&self) -> usize {
+        self.values[0].len()
+    }
+
+    /// Construct an empty array of points to hold size data points of the
+    /// provided types.
+    ///
+    /// The type information may have length > 1 to reserve space for
+    /// multi-dimensional values.
+    pub fn with_capacity<D, M>(
+        size: usize,
+        data_types: D,
+        metric_types: M,
+    ) -> Result<Self, Error>
+    where
+        D: ExactSizeIterator<Item = DataType>,
+        M: ExactSizeIterator<Item = MetricType>,
+    {
+        anyhow::ensure!(
+            data_types.len() == metric_types.len(),
+            "Data and metric type iterators must have the same length",
+        );
+        let timestamps = Vec::with_capacity(size);
+        let mut start_times = None;
+        let mut values = Vec::with_capacity(data_types.len());
+        for (data_type, metric_type) in data_types.zip(metric_types) {
+            if matches!(metric_type, MetricType::Delta | MetricType::Cumulative)
+                && start_times.is_none()
+            {
+                start_times.replace(Vec::with_capacity(size));
+            }
+            values.push(Values::with_capacity(size, data_type, metric_type));
+        }
+        Ok(Self { start_times, timestamps, values })
+    }
+
+    /// Return the data types of self.
+    pub fn data_types(&self) -> impl ExactSizeIterator<Item = DataType> + '_ {
+        self.values.iter().map(|val| val.values.data_type())
+    }
+
+    /// Return the metric types of self.
+    pub fn metric_types(
+        &self,
+    ) -> impl ExactSizeIterator<Item = MetricType> + '_ {
+        self.values.iter().map(|val| val.metric_type)
+    }
+
+    /// Return the single metric type of all values in self, it they are all the
+    /// same.
+    pub fn metric_type(&self) -> Option<MetricType> {
+        let mut types = self.metric_types();
+        let Some(first_type) = types.next() else {
+            unreachable!();
+        };
+        if types.all(|ty| ty == first_type) {
+            Some(first_type)
+        } else {
+            None
+        }
+    }
+
+    /// Construct a list of gauge points from a list of gauge measurements.
+    ///
+    /// An error is returned if the provided input measurements are not gauges,
+    /// or do not all have the same datum type.
+    pub fn gauge_from_gauge(
+        measurements: &[Measurement],
+    ) -> Result<Self, Error> {
+        let Some(first) = measurements.first() else {
+            anyhow::bail!(
+                "Cannot construct points from empty measurements array"
+            );
+        };
+        let datum_type = first.datum_type();
+        anyhow::ensure!(
+            !datum_type.is_cumulative(),
+            "Measurements are not gauges"
+        );
+        let data_type = DataType::try_from(datum_type)?;
+        let mut self_ = Self::with_capacity(
+            measurements.len(),
+            std::iter::once(data_type),
+            std::iter::once(MetricType::Gauge),
+        )?;
+
+        // Since we're directly pushing gauges, each measurement is independent
+        // of the others. Simply translate types and push the data.
+        for measurement in measurements.iter() {
+            anyhow::ensure!(
+                measurement.datum_type() == datum_type,
+                "Measurements must all have the same datum type",
+            );
+            self_
+                .values_mut(0)
+                .unwrap()
+                .push_value_from_datum(measurement.datum())?;
+            self_.timestamps.push(measurement.timestamp());
+        }
+        Ok(self_)
+    }
+
+    /// Construct a list of delta points from a list of cumulative measurements.
+    ///
+    /// An error is returned if the provided measurements are not of the same
+    /// type or not cumulative.
+    pub fn delta_from_cumulative(
+        measurements: &[Measurement],
+    ) -> Result<Self, Error> {
+        let mut iter = measurements.iter();
+        let Some(first) = iter.next() else {
+            anyhow::bail!(
+                "Cannot construct points from empty measurements array"
+            );
+        };
+        let datum_type = first.datum_type();
+        anyhow::ensure!(
+            datum_type.is_cumulative(),
+            "Measurements are not cumulative",
+        );
+        let data_type = DataType::try_from(datum_type)?;
+        let mut self_ = Self::with_capacity(
+            measurements.len(),
+            std::iter::once(data_type),
+            std::iter::once(MetricType::Delta),
+        )?;
+
+        // Construct the first point, which directly uses the start / end time
+        // of the first measurement itself.
+        self_.values_mut(0).unwrap().push_value_from_datum(first.datum())?;
+        self_.start_times.as_mut().unwrap().push(first.start_time().unwrap());
+        self_.timestamps.push(first.timestamp());
+
+        // We need to keep track of the last cumulative measurement that's not
+        // _missing_, to compute successive differences between neighboring
+        // points. Note that we only need the datum from the measurement,
+        // because even missing samples have valid timestamp information. So we
+        // can always generate the timestamp for each delta, even if the datum
+        // is missing.
+        let mut last_datum = if first.is_missing() {
+            None
+        } else {
+            // Safety: We're confirming above the measurement is cumulative, and
+            // in this block if the datum is missing. So we know this conversion
+            // should succeed.
+            Some(CumulativeDatum::from_cumulative(first).unwrap())
+        };
+
+        // We also need to keep track of the start time of this "epoch", periods
+        // where the cumulative data has the same start time. If there are jumps
+        // forward in this, and thus gaps in the records, we need to update the
+        // start_time of the epoch and also the last datum.
+        let mut epoch_start_time = first.start_time().unwrap();
+
+        // Push the remaining values.
+        for measurement in iter {
+            anyhow::ensure!(
+                measurement.datum_type() == datum_type,
+                "Measurements must all have the same datum type"
+            );
+
+            // For the time ranges we must have either:
+            //
+            // 1. Either the start time of the _first_ and new points must be
+            //    equal, with the timestamp of the new strictly later than the
+            //    timestamp of the last, OR
+            // 2. Both the start time and timestamp of the new point must be
+            //    strictly later than the timestamp (and thus start time) of the
+            //    last point. In this case, we effectively have a _gap_ in the
+            //    timeseries, and so we need to update `first_start_time` to
+            //    reflect this new epoch.
+            let last_start_time =
+                *self_.start_times.as_ref().unwrap().last().unwrap();
+            let last_timestamp = *self_.timestamps.last().unwrap();
+            let new_start_time = measurement.start_time().unwrap();
+            let new_timestamp = measurement.timestamp();
+
+            if epoch_start_time == new_start_time
+                && last_timestamp < new_timestamp
+            {
+                // Push the timestamps to reflect this interval, from the end of
+                // the last sample to the end of this one.
+                self_.start_times.as_mut().unwrap().push(last_timestamp);
+                self_.timestamps.push(new_timestamp);
+
+                // The data value is the difference between the last non-missing
+                // datum and the new datum.
+                self_.values_mut(0).unwrap().push_diff_from_last_to_datum(
+                    &last_datum,
+                    measurement.datum(),
+                    data_type,
+                )?;
+            } else if new_start_time > last_timestamp
+                && new_timestamp > last_timestamp
+            {
+                // Push the new start time directly, since it begins a new
+                // epoch.
+                self_.start_times.as_mut().unwrap().push(new_start_time);
+                self_.timestamps.push(new_timestamp);
+
+                // Update the epoch start time, and also simply push the datum
+                // directly. The difference with the previous is not meaningful,
+                // since we've begun a new epoch.
+                epoch_start_time = new_start_time;
+                self_
+                    .values_mut(0)
+                    .unwrap()
+                    .push_value_from_datum(measurement.datum())?;
+            } else {
+                // Print as useful a message as we can here.
+                anyhow::bail!(
+                    "Cannot compute a delta, the timestamp of the next \
+                    sample has a new start time, or overlaps with the \
+                    last processed sample. \n \
+                    epoch start time = {epoch_start_time}\n \
+                    last timestamp = [{last_start_time}, {last_timestamp}]\n \
+                    new timestamp = [{new_start_time}, {new_timestamp}]"
+                );
+            }
+
+            // If the new datum is _not_ missing, we'll update the last one.
+            if !measurement.is_missing() {
+                last_datum.replace(
+                    CumulativeDatum::from_cumulative(measurement).unwrap(),
+                );
+            }
+        }
+        Ok(self_)
+    }
+
+    /// Iterate over each point in self.
+    pub fn iter_points(&self) -> impl Iterator<Item = Point<'_>> + '_ {
+        (0..self.len()).map(|i| Point {
+            start_time: self.start_times.as_ref().map(|s| &s[i]),
+            timestamp: &self.timestamps[i],
+            values: self
+                .values
+                .iter()
+                .map(|val| (val.values.get(i), val.metric_type))
+                .collect(),
+        })
+    }
+
+    // Filter points in self to those where `to_keep` is true.
+    pub(crate) fn filter(&self, to_keep: Vec<bool>) -> Result<Points, Error> {
+        anyhow::ensure!(
+            to_keep.len() == self.len(),
+            "Filter array must be the same length as self",
+        );
+
+        // Compute the indices of values we're keeping.
+        let indices: Vec<_> = to_keep
+            .iter()
+            .enumerate()
+            .filter(|(_ix, to_keep)| **to_keep)
+            .map(|(ix, _)| ix)
+            .collect();
+        let n_true = indices.len();
+        let mut out = Self::with_capacity(
+            n_true,
+            self.data_types(),
+            self.metric_types(),
+        )?;
+
+        // Push the compressed start times, if any.
+        if let Some(start_times) = self.start_times.as_ref() {
+            let Some(new_start_times) = out.start_times.as_mut() else {
+                unreachable!();
+            };
+            for ix in indices.iter().copied() {
+                new_start_times.push(start_times[ix]);
+            }
+        }
+
+        // Push the compressed timestamps.
+        for ix in indices.iter().copied() {
+            out.timestamps.push(self.timestamps[ix]);
+        }
+
+        // Push each dimension of the data values themselves.
+        for (new_values, existing_values) in
+            out.values.iter_mut().zip(self.values.iter())
+        {
+            match (&mut new_values.values, &existing_values.values) {
+                (ValueArray::Integer(new), ValueArray::Integer(existing)) => {
+                    for ix in indices.iter().copied() {
+                        new.push(existing[ix]);
+                    }
+                }
+                (ValueArray::Double(new), ValueArray::Double(existing)) => {
+                    for ix in indices.iter().copied() {
+                        new.push(existing[ix]);
+                    }
+                }
+                (ValueArray::Boolean(new), ValueArray::Boolean(existing)) => {
+                    for ix in indices.iter().copied() {
+                        new.push(existing[ix]);
+                    }
+                }
+                (ValueArray::String(new), ValueArray::String(existing)) => {
+                    for ix in indices.iter().copied() {
+                        new.push(existing[ix].clone());
+                    }
+                }
+                (
+                    ValueArray::IntegerDistribution(new),
+                    ValueArray::IntegerDistribution(existing),
+                ) => {
+                    for ix in indices.iter().copied() {
+                        new.push(existing[ix].clone());
+                    }
+                }
+                (
+                    ValueArray::DoubleDistribution(new),
+                    ValueArray::DoubleDistribution(existing),
+                ) => {
+                    for ix in indices.iter().copied() {
+                        new.push(existing[ix].clone());
+                    }
+                }
+                (_, _) => unreachable!(),
+            }
+        }
+        Ok(out)
+    }
+
+    // Return a new set of points, with the values casted to the provided types.
+    pub(crate) fn cast(&self, types: &[DataType]) -> Result<Self, Error> {
+        anyhow::ensure!(
+            types.len() == self.dimensionality(),
+            "Cannot cast to {} types, the data has dimensionality {}",
+            types.len(),
+            self.dimensionality(),
+        );
+        let start_times = self.start_times.clone();
+        let timestamps = self.timestamps.clone();
+        let mut new_values = Vec::with_capacity(self.dimensionality());
+        for (new_type, existing_values) in types.iter().zip(self.values.iter())
+        {
+            let values = match (new_type, &existing_values.values) {
+                // "Cast" from i64 -> i64
+                (DataType::Integer, ValueArray::Integer(vals)) => {
+                    ValueArray::Integer(vals.clone())
+                }
+
+                // Cast f64 -> i64
+                (DataType::Integer, ValueArray::Double(doubles)) => {
+                    let mut new = Vec::with_capacity(doubles.len());
+                    for maybe_double in doubles.iter().copied() {
+                        if let Some(d) = maybe_double {
+                            let as_int = d
+                                .to_i64()
+                                .context("Cannot cast double {d} to i64")?;
+                            new.push(Some(as_int));
+                        } else {
+                            new.push(None);
+                        }
+                    }
+                    ValueArray::Integer(new)
+                }
+
+                // Cast bool -> i64
+                (DataType::Integer, ValueArray::Boolean(bools)) => {
+                    ValueArray::Integer(
+                        bools
+                            .iter()
+                            .copied()
+                            .map(|b| b.map(i64::from))
+                            .collect(),
+                    )
+                }
+
+                // Cast string -> i64, by parsing.
+                (DataType::Integer, ValueArray::String(strings)) => {
+                    let mut new = Vec::with_capacity(strings.len());
+                    for maybe_str in strings.iter() {
+                        if let Some(s) = maybe_str {
+                            let as_int = s
+                                .parse()
+                                .context("Cannot cast string '{s}' to i64")?;
+                            new.push(Some(as_int));
+                        } else {
+                            new.push(None);
+                        }
+                    }
+                    ValueArray::Integer(new)
+                }
+
+                // Cast i64 -> f64
+                (DataType::Double, ValueArray::Integer(ints)) => {
+                    let mut new = Vec::with_capacity(ints.len());
+                    for maybe_int in ints.iter().copied() {
+                        if let Some(int) = maybe_int {
+                            let as_double = int.to_f64().context(
+                                "Cannot cast integer {int} as double",
+                            )?;
+                            new.push(Some(as_double));
+                        } else {
+                            new.push(None);
+                        }
+                    }
+                    ValueArray::Double(new)
+                }
+
+                // "Cast" f64 -> f64
+                (DataType::Double, ValueArray::Double(vals)) => {
+                    ValueArray::Double(vals.clone())
+                }
+
+                // Cast bool -> f64
+                (DataType::Double, ValueArray::Boolean(bools)) => {
+                    ValueArray::Double(
+                        bools
+                            .iter()
+                            .copied()
+                            .map(|b| b.map(f64::from))
+                            .collect(),
+                    )
+                }
+
+                // Cast string -> f64, by parsing.
+                (DataType::Double, ValueArray::String(strings)) => {
+                    let mut new = Vec::with_capacity(strings.len());
+                    for maybe_str in strings.iter() {
+                        if let Some(s) = maybe_str {
+                            let as_double = s
+                                .parse()
+                                .context("Cannot cast string '{s}' to f64")?;
+                            new.push(Some(as_double));
+                        } else {
+                            new.push(None);
+                        }
+                    }
+                    ValueArray::Double(new)
+                }
+
+                // Cast i64 -> bool
+                //
+                // Any non-zero value is considered truthy.
+                (DataType::Boolean, ValueArray::Integer(ints)) => {
+                    let mut new = Vec::with_capacity(ints.len());
+                    for maybe_int in ints.iter().copied() {
+                        match maybe_int {
+                            Some(0) => new.push(Some(false)),
+                            Some(_) => new.push(Some(true)),
+                            None => new.push(None),
+                        }
+                    }
+                    ValueArray::Boolean(new)
+                }
+
+                // Cast f64 -> bool
+                //
+                // Any non-zero value is considered truthy.
+                (DataType::Boolean, ValueArray::Double(doubles)) => {
+                    let mut new = Vec::with_capacity(doubles.len());
+                    for maybe_double in doubles.iter().copied() {
+                        match maybe_double {
+                            Some(d) if d == 0.0 => new.push(Some(false)),
+                            Some(_) => new.push(Some(true)),
+                            None => new.push(None),
+                        }
+                    }
+                    ValueArray::Boolean(new)
+                }
+
+                // "Cast" bool -> bool
+                (DataType::Boolean, ValueArray::Boolean(vals)) => {
+                    ValueArray::Boolean(vals.clone())
+                }
+
+                // Cast string -> bool.
+                //
+                // Any non-empty string is considered truthy
+                (DataType::Boolean, ValueArray::String(strings)) => {
+                    let mut new = Vec::with_capacity(strings.len());
+                    for maybe_str in strings.iter() {
+                        match maybe_str {
+                            Some(s) if s.is_empty() => new.push(Some(false)),
+                            Some(_) => new.push(Some(true)),
+                            None => new.push(None),
+                        }
+                    }
+                    ValueArray::Boolean(new)
+                }
+
+                // Cast i64 -> string
+                (DataType::String, ValueArray::Integer(ints)) => {
+                    ValueArray::String(
+                        ints.iter().map(|x| x.map(|x| x.to_string())).collect(),
+                    )
+                }
+
+                // Cast f64 -> string
+                (DataType::String, ValueArray::Double(doubles)) => {
+                    ValueArray::String(
+                        doubles
+                            .iter()
+                            .map(|x| x.map(|x| x.to_string()))
+                            .collect(),
+                    )
+                }
+
+                // Cast bool -> string
+                (DataType::String, ValueArray::Boolean(bools)) => {
+                    ValueArray::String(
+                        bools
+                            .iter()
+                            .map(|x| x.map(|x| x.to_string()))
+                            .collect(),
+                    )
+                }
+
+                // "Cast" string -> string
+                (DataType::String, ValueArray::String(vals)) => {
+                    ValueArray::String(vals.clone())
+                }
+
+                // "Cast" distributions to the same type of distribution
+                (
+                    DataType::IntegerDistribution,
+                    ValueArray::IntegerDistribution(vals),
+                ) => ValueArray::IntegerDistribution(vals.clone()),
+                (
+                    DataType::DoubleDistribution,
+                    ValueArray::DoubleDistribution(vals),
+                ) => ValueArray::DoubleDistribution(vals.clone()),
+
+                // All other casts are invalid
+                (_, vals) => anyhow::bail!(
+                    "Cannot cast {} -> {}",
+                    new_type,
+                    vals.data_type(),
+                ),
+            };
+            new_values.push(Values {
+                values,
+                metric_type: existing_values.metric_type,
+            });
+        }
+        Ok(Self { start_times, timestamps, values: new_values })
+    }
+
+    /// Return true if self contains no data points.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+/// List of data values for one timeseries.
+///
+/// Each element is an option, where `None` represents a missing sample.
+#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)]
+#[serde(rename_all = "snake_case", tag = "type", content = "values")]
+pub enum ValueArray {
+    Integer(Vec<Option<i64>>),
+    Double(Vec<Option<f64>>),
+    Boolean(Vec<Option<bool>>),
+    String(Vec<Option<String>>),
+    IntegerDistribution(Vec<Option<Distribution<i64>>>),
+    DoubleDistribution(Vec<Option<Distribution<f64>>>),
+}
+
+impl ValueArray {
+    // Create an empty array with capacity `size` of the provided data type.
+    fn with_capacity(size: usize, data_type: DataType) -> Self {
+        match data_type {
+            DataType::Integer => Self::Integer(Vec::with_capacity(size)),
+            DataType::Double => Self::Double(Vec::with_capacity(size)),
+            DataType::Boolean => Self::Boolean(Vec::with_capacity(size)),
+            DataType::String => Self::String(Vec::with_capacity(size)),
+            DataType::IntegerDistribution => {
+                Self::IntegerDistribution(Vec::with_capacity(size))
+            }
+            DataType::DoubleDistribution => {
+                Self::DoubleDistribution(Vec::with_capacity(size))
+            }
+        }
+    }
+
+    // Return the data type in self.
+    pub(super) fn data_type(&self) -> DataType {
+        match self {
+            ValueArray::Integer(_) => DataType::Integer,
+            ValueArray::Double(_) => DataType::Double,
+            ValueArray::Boolean(_) => DataType::Boolean,
+            ValueArray::String(_) => DataType::String,
+            ValueArray::IntegerDistribution(_) => DataType::IntegerDistribution,
+            ValueArray::DoubleDistribution(_) => DataType::DoubleDistribution,
+        }
+    }
+
+    // Access the inner array of booleans, if possible.
+    pub(super) fn as_boolean_mut(
+        &mut self,
+    ) -> Result<&mut Vec<Option<bool>>, Error> {
+        let ValueArray::Boolean(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as boolean type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    /// Access the values as an array of bools, if they have that type.
+    pub fn as_boolean(&self) -> Result<&Vec<Option<bool>>, Error> {
+        let ValueArray::Boolean(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as boolean type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    /// Access the values as an array of integers, if they have that type.
+    pub fn as_integer(&self) -> Result<&Vec<Option<i64>>, Error> {
+        let ValueArray::Integer(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as integer type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    // Access the inner array of integers, if possible.
+    pub(super) fn as_integer_mut(
+        &mut self,
+    ) -> Result<&mut Vec<Option<i64>>, Error> {
+        let ValueArray::Integer(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as integer type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    /// Access the values as an array of doubles, if they have that type.
+    pub fn as_double(&self) -> Result<&Vec<Option<f64>>, Error> {
+        let ValueArray::Double(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as double type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    // Access the inner array of doubles, if possible.
+    pub(super) fn as_double_mut(
+        &mut self,
+    ) -> Result<&mut Vec<Option<f64>>, Error> {
+        let ValueArray::Double(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as double type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    /// Access the values as an array of strings, if they have that type.
+    pub fn as_string(&self) -> Result<&Vec<Option<String>>, Error> {
+        let ValueArray::String(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as string type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    // Access the inner array of strings, if possible.
+    pub(super) fn as_string_mut(
+        &mut self,
+    ) -> Result<&mut Vec<Option<String>>, Error> {
+        let ValueArray::String(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as string type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    /// Access the values as an array of integer distribution, if they have that
+    /// type.
+    pub fn as_integer_distribution(
+        &self,
+    ) -> Result<&Vec<Option<Distribution<i64>>>, Error> {
+        let ValueArray::IntegerDistribution(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as integer \
+                distribution type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    // Access the inner array of integer distribution, if possible.
+    pub(super) fn as_integer_distribution_mut(
+        &mut self,
+    ) -> Result<&mut Vec<Option<Distribution<i64>>>, Error> {
+        let ValueArray::IntegerDistribution(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as integer \
+                distribution type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    /// Access the values as an array of double distribution, if they have that
+    /// type.
+    pub fn as_double_distribution(
+        &self,
+    ) -> Result<&Vec<Option<Distribution<f64>>>, Error> {
+        let ValueArray::DoubleDistribution(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as double \
+                distribution type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    // Access the inner array of double distributions, if possible.
+    pub(super) fn as_double_distribution_mut(
+        &mut self,
+    ) -> Result<&mut Vec<Option<Distribution<f64>>>, Error> {
+        let ValueArray::DoubleDistribution(inner) = self else {
+            anyhow::bail!(
+                "Cannot access value array as double \
+                distribution type, it has type {}",
+                self.data_type(),
+            );
+        };
+        Ok(inner)
+    }
+
+    fn push_missing(&mut self, datum_type: DatumType) -> Result<(), Error> {
+        match datum_type {
+            DatumType::Bool => self.as_boolean_mut()?.push(None),
+            DatumType::I8
+            | DatumType::U8
+            | DatumType::I16
+            | DatumType::U16
+            | DatumType::I32
+            | DatumType::U32
+            | DatumType::I64
+            | DatumType::U64
+            | DatumType::CumulativeI64
+            | DatumType::CumulativeU64 => self.as_integer_mut()?.push(None),
+            DatumType::F32
+            | DatumType::F64
+            | DatumType::CumulativeF32
+            | DatumType::CumulativeF64 => self.as_double_mut()?.push(None),
+            DatumType::String => self.as_string_mut()?.push(None),
+            DatumType::Bytes => {
+                anyhow::bail!("Bytes data types are not yet supported")
+            }
+            DatumType::HistogramI8
+            | DatumType::HistogramU8
+            | DatumType::HistogramI16
+            | DatumType::HistogramU16
+            | DatumType::HistogramI32
+            | DatumType::HistogramU32
+            | DatumType::HistogramI64
+            | DatumType::HistogramU64 => {
+                self.as_integer_distribution_mut()?.push(None)
+            }
+            DatumType::HistogramF32 | DatumType::HistogramF64 => {
+                self.as_double_distribution_mut()?.push(None)
+            }
+        }
+        Ok(())
+    }
+
+    // Push a value directly from a datum, without modification.
+    fn push_value_from_datum(
+        &mut self,
+        datum: &oximeter::Datum,
+    ) -> Result<(), Error> {
+        match datum {
+            oximeter::Datum::Bool(b) => self.as_boolean_mut()?.push(Some(*b)),
+            oximeter::Datum::I8(i) => {
+                self.as_integer_mut()?.push(Some(i64::from(*i)))
+            }
+            oximeter::Datum::U8(i) => {
+                self.as_integer_mut()?.push(Some(i64::from(*i)))
+            }
+            oximeter::Datum::I16(i) => {
+                self.as_integer_mut()?.push(Some(i64::from(*i)))
+            }
+            oximeter::Datum::U16(i) => {
+                self.as_integer_mut()?.push(Some(i64::from(*i)))
+            }
+            oximeter::Datum::I32(i) => {
+                self.as_integer_mut()?.push(Some(i64::from(*i)))
+            }
+            oximeter::Datum::U32(i) => {
+                self.as_integer_mut()?.push(Some(i64::from(*i)))
+            }
+            oximeter::Datum::I64(i) => self.as_integer_mut()?.push(Some(*i)),
+            oximeter::Datum::U64(i) => {
+                let i =
+                    i.to_i64().context("Failed to convert u64 datum to i64")?;
+                self.as_integer_mut()?.push(Some(i));
+            }
+            oximeter::Datum::F32(f) => {
+                self.as_double_mut()?.push(Some(f64::from(*f)))
+            }
+            oximeter::Datum::F64(f) => self.as_double_mut()?.push(Some(*f)),
+            oximeter::Datum::String(s) => {
+                self.as_string_mut()?.push(Some(s.clone()))
+            }
+            oximeter::Datum::Bytes(_) => {
+                anyhow::bail!("Bytes data types are not yet supported")
+            }
+            oximeter::Datum::CumulativeI64(c) => {
+                self.as_integer_mut()?.push(Some(c.value()))
+            }
+            oximeter::Datum::CumulativeU64(c) => {
+                let c = c
+                    .value()
+                    .to_i64()
+                    .context("Failed to convert u64 datum to i64")?;
+                self.as_integer_mut()?.push(Some(c));
+            }
+            oximeter::Datum::CumulativeF32(c) => {
+                self.as_double_mut()?.push(Some(f64::from(c.value())))
+            }
+            oximeter::Datum::CumulativeF64(c) => {
+                self.as_double_mut()?.push(Some(c.value()))
+            }
+            oximeter::Datum::HistogramI8(h) => self
+                .as_integer_distribution_mut()?
+                .push(Some(Distribution::from(h))),
+            oximeter::Datum::HistogramU8(h) => self
+                .as_integer_distribution_mut()?
+                .push(Some(Distribution::from(h))),
+            oximeter::Datum::HistogramI16(h) => self
+                .as_integer_distribution_mut()?
+                .push(Some(Distribution::from(h))),
+            oximeter::Datum::HistogramU16(h) => self
+                .as_integer_distribution_mut()?
+                .push(Some(Distribution::from(h))),
+            oximeter::Datum::HistogramI32(h) => self
+                .as_integer_distribution_mut()?
+                .push(Some(Distribution::from(h))),
+            oximeter::Datum::HistogramU32(h) => self
+                .as_integer_distribution_mut()?
+                .push(Some(Distribution::from(h))),
+            oximeter::Datum::HistogramI64(h) => self
+                .as_integer_distribution_mut()?
+                .push(Some(Distribution::from(h))),
+            oximeter::Datum::HistogramU64(h) => self
+                .as_integer_distribution_mut()?
+                .push(Some(Distribution::try_from(h)?)),
+            oximeter::Datum::HistogramF32(h) => self
+                .as_double_distribution_mut()?
+                .push(Some(Distribution::from(h))),
+            oximeter::Datum::HistogramF64(h) => self
+                .as_double_distribution_mut()?
+                .push(Some(Distribution::from(h))),
+            oximeter::Datum::Missing(missing) => {
+                self.push_missing(missing.datum_type())?
+            }
+        }
+        Ok(())
+    }
+
+    // Push a delta from the last valid datum and a new one.
+    //
+    // This takes the last valid datum, if any, and a new one. It computes the
+    // delta between the the values of the datum, if possible, and pushes it
+    // onto the correct value array inside `self`.
+    //
+    // If both the last datum and new one exist (are not missing), the normal
+    // diff is pushed. If the last datum is missing, but the new one exists,
+    // then the new value is pushed directly. If the last datum exists but the
+    // new one does not, then a missing datum is pushed. If both are missing,
+    // then a missing one is pushed as well.
+    //
+    // In other words, the diff is always between the new datum and the last
+    // non-None value. If such a last value does not exist, the datum is
+    // inserted directly.
+    fn push_diff_from_last_to_datum(
+        &mut self,
+        last_datum: &Option<CumulativeDatum>,
+        new_datum: &oximeter::Datum,
+        data_type: DataType,
+    ) -> Result<(), Error> {
+        match (last_datum.as_ref(), new_datum.is_missing()) {
+            (None, true) | (Some(_), true) => {
+                // In this case, either both values are missing, or just the new
+                // one is. In either case, we cannot compute a new value, and
+                // need to insert None to represent the new missing datum.
+                match data_type {
+                    DataType::Integer => self.as_integer_mut()?.push(None),
+                    DataType::Double => self.as_double_mut()?.push(None),
+                    DataType::Boolean => self.as_boolean_mut()?.push(None),
+                    DataType::String => self.as_string_mut()?.push(None),
+                    DataType::IntegerDistribution => {
+                        self.as_integer_distribution_mut()?.push(None)
+                    }
+                    DataType::DoubleDistribution => {
+                        self.as_double_distribution_mut()?.push(None)
+                    }
+                }
+            }
+            (None, false) => {
+                // The last datum was missing, but the new one is not. We cannot
+                // compute the difference, since we have no previous point.
+                // However, we can still push some value by inserting the datum
+                // directly.
+                self.push_value_from_datum(new_datum)?;
+            }
+            (Some(last_datum), false) => {
+                // Both values exist, so we can compute the difference between
+                // them and insert that.
+                //
+                // Note that we're asserting both are the same _datum_ type,
+                // which is guaranteed by a check in the caller.
+                match (last_datum, new_datum) {
+                    (
+                        CumulativeDatum::Integer(last),
+                        oximeter::Datum::I8(new),
+                    ) => {
+                        let new = i64::from(*new);
+                        self.as_integer_mut()?.push(Some(new - last));
+                    }
+                    (
+                        CumulativeDatum::Integer(last),
+                        oximeter::Datum::U8(new),
+                    ) => {
+                        let new = i64::from(*new);
+                        self.as_integer_mut()?.push(Some(new - last));
+                    }
+                    (
+                        CumulativeDatum::Integer(last),
+                        oximeter::Datum::I16(new),
+                    ) => {
+                        let new = i64::from(*new);
+                        self.as_integer_mut()?.push(Some(new - last));
+                    }
+                    (
+                        CumulativeDatum::Integer(last),
+                        oximeter::Datum::U16(new),
+                    ) => {
+                        let new = i64::from(*new);
+                        self.as_integer_mut()?.push(Some(new - last));
+                    }
+                    (
+                        CumulativeDatum::Integer(last),
+                        oximeter::Datum::I32(new),
+                    ) => {
+                        let new = i64::from(*new);
+                        self.as_integer_mut()?.push(Some(new - last));
+                    }
+                    (
+                        CumulativeDatum::Integer(last),
+                        oximeter::Datum::U32(new),
+                    ) => {
+                        let new = i64::from(*new);
+                        self.as_integer_mut()?.push(Some(new - last));
+                    }
+                    (
+                        CumulativeDatum::Integer(last),
+                        oximeter::Datum::I64(new),
+                    ) => {
+                        let diff = new
+                            .checked_sub(*last)
+                            .context("Overflow computing deltas")?;
+                        self.as_integer_mut()?.push(Some(diff));
+                    }
+                    (
+                        CumulativeDatum::Integer(last),
+                        oximeter::Datum::U64(new),
+                    ) => {
+                        let new = new
+                            .to_i64()
+                            .context("Failed to convert u64 datum to i64")?;
+                        let diff = new
+                            .checked_sub(*last)
+                            .context("Overflow computing deltas")?;
+                        self.as_integer_mut()?.push(Some(diff));
+                    }
+                    (
+                        CumulativeDatum::Double(last),
+                        oximeter::Datum::F32(new),
+                    ) => {
+                        self.as_double_mut()?
+                            .push(Some(f64::from(*new) - last));
+                    }
+                    (
+                        CumulativeDatum::Double(last),
+                        oximeter::Datum::F64(new),
+                    ) => {
+                        self.as_double_mut()?.push(Some(new - last));
+                    }
+                    (
+                        CumulativeDatum::Integer(last),
+                        oximeter::Datum::CumulativeI64(new),
+                    ) => {
+                        let new = new.value();
+                        let diff = new
+                            .checked_sub(*last)
+                            .context("Overflow computing deltas")?;
+                        self.as_integer_mut()?.push(Some(diff));
+                    }
+                    (
+                        CumulativeDatum::Integer(last),
+                        oximeter::Datum::CumulativeU64(new),
+                    ) => {
+                        let new = new
+                            .value()
+                            .to_i64()
+                            .context("Failed to convert u64 datum to i64")?;
+                        let diff = new
+                            .checked_sub(*last)
+                            .context("Overflow computing deltas")?;
+                        self.as_integer_mut()?.push(Some(diff));
+                    }
+                    (
+                        CumulativeDatum::Double(last),
+                        oximeter::Datum::CumulativeF32(new),
+                    ) => {
+                        self.as_double_mut()?
+                            .push(Some(f64::from(new.value()) - last));
+                    }
+                    (
+                        CumulativeDatum::Double(last),
+                        oximeter::Datum::CumulativeF64(new),
+                    ) => {
+                        self.as_double_mut()?.push(Some(new.value() - last));
+                    }
+                    (
+                        CumulativeDatum::IntegerDistribution(last),
+                        oximeter::Datum::HistogramI8(new),
+                    ) => {
+                        let new = Distribution::from(new);
+                        self.as_integer_distribution_mut()?
+                            .push(Some(new.checked_sub(&last)?));
+                    }
+                    (
+                        CumulativeDatum::IntegerDistribution(last),
+                        oximeter::Datum::HistogramU8(new),
+                    ) => {
+                        let new = Distribution::from(new);
+                        self.as_integer_distribution_mut()?
+                            .push(Some(new.checked_sub(&last)?));
+                    }
+                    (
+                        CumulativeDatum::IntegerDistribution(last),
+                        oximeter::Datum::HistogramI16(new),
+                    ) => {
+                        let new = Distribution::from(new);
+                        self.as_integer_distribution_mut()?
+                            .push(Some(new.checked_sub(&last)?));
+                    }
+                    (
+                        CumulativeDatum::IntegerDistribution(last),
+                        oximeter::Datum::HistogramU16(new),
+                    ) => {
+                        let new = Distribution::from(new);
+                        self.as_integer_distribution_mut()?
+                            .push(Some(new.checked_sub(&last)?));
+                    }
+                    (
+                        CumulativeDatum::IntegerDistribution(last),
+                        oximeter::Datum::HistogramI32(new),
+                    ) => {
+                        let new = Distribution::from(new);
+                        self.as_integer_distribution_mut()?
+                            .push(Some(new.checked_sub(&last)?));
+                    }
+                    (
+                        CumulativeDatum::IntegerDistribution(last),
+                        oximeter::Datum::HistogramU32(new),
+                    ) => {
+                        let new = Distribution::from(new);
+                        self.as_integer_distribution_mut()?
+                            .push(Some(new.checked_sub(&last)?));
+                    }
+                    (
+                        CumulativeDatum::IntegerDistribution(last),
+                        oximeter::Datum::HistogramI64(new),
+                    ) => {
+                        let new = Distribution::from(new);
+                        self.as_integer_distribution_mut()?
+                            .push(Some(new.checked_sub(&last)?));
+                    }
+                    (
+                        CumulativeDatum::IntegerDistribution(last),
+                        oximeter::Datum::HistogramU64(new),
+                    ) => {
+                        let new = Distribution::try_from(new)?;
+                        self.as_integer_distribution_mut()?
+                            .push(Some(new.checked_sub(&last)?));
+                    }
+                    (
+                        CumulativeDatum::DoubleDistribution(last),
+                        oximeter::Datum::HistogramF32(new),
+                    ) => {
+                        let new = Distribution::from(new);
+                        self.as_double_distribution_mut()?
+                            .push(Some(new.checked_sub(&last)?));
+                    }
+                    (
+                        CumulativeDatum::DoubleDistribution(last),
+                        oximeter::Datum::HistogramF64(new),
+                    ) => {
+                        let new = Distribution::from(new);
+                        self.as_double_distribution_mut()?
+                            .push(Some(new.checked_sub(&last)?));
+                    }
+                    (_, _) => unreachable!(),
+                }
+            }
+        }
+        Ok(())
+    }
+
+    // Return the number of samples in self.
+    fn len(&self) -> usize {
+        match self {
+            ValueArray::Boolean(inner) => inner.len(),
+            ValueArray::Integer(inner) => inner.len(),
+            ValueArray::Double(inner) => inner.len(),
+            ValueArray::String(inner) => inner.len(),
+            ValueArray::IntegerDistribution(inner) => inner.len(),
+            ValueArray::DoubleDistribution(inner) => inner.len(),
+        }
+    }
+
+    // Return a reference to the i-th value in the array.
+    //
+    // This panics if `i >= self.len()`.
+    fn get(&self, i: usize) -> Datum<'_> {
+        match self {
+            ValueArray::Boolean(inner) => Datum::Boolean(inner[i]),
+            ValueArray::Integer(inner) => {
+                Datum::Integer(inner.get(i).unwrap().as_ref())
+            }
+            ValueArray::Double(inner) => {
+                Datum::Double(inner.get(i).unwrap().as_ref())
+            }
+            ValueArray::String(inner) => {
+                Datum::String(inner.get(i).unwrap().as_deref())
+            }
+            ValueArray::IntegerDistribution(inner) => {
+                Datum::IntegerDistribution(inner.get(i).unwrap().as_ref())
+            }
+            ValueArray::DoubleDistribution(inner) => {
+                Datum::DoubleDistribution(inner.get(i).unwrap().as_ref())
+            }
+        }
+    }
+
+    // Swap the value in self with other, asserting they're the same type.
+    pub(crate) fn swap(&mut self, mut values: ValueArray) {
+        use std::mem::swap;
+        match (self, &mut values) {
+            (ValueArray::Integer(x), ValueArray::Integer(y)) => swap(x, y),
+            (ValueArray::Double(x), ValueArray::Double(y)) => swap(x, y),
+            (ValueArray::Boolean(x), ValueArray::Boolean(y)) => swap(x, y),
+            (ValueArray::String(x), ValueArray::String(y)) => swap(x, y),
+            (
+                ValueArray::IntegerDistribution(x),
+                ValueArray::IntegerDistribution(y),
+            ) => swap(x, y),
+            (
+                ValueArray::DoubleDistribution(x),
+                ValueArray::DoubleDistribution(y),
+            ) => swap(x, y),
+            (_, _) => panic!("Cannot swap values of different types"),
+        }
+    }
+}
+
+mod private {
+    pub trait Sealed {}
+    impl Sealed for i64 {}
+    impl Sealed for f64 {}
+}
+
+pub trait DistributionSupport:
+    fmt::Display + Clone + Copy + fmt::Debug + PartialEq + private::Sealed
+{
+}
+impl DistributionSupport for i64 {}
+impl DistributionSupport for f64 {}
+
+/// A distribution is a sequence of bins and counts in those bins.
+#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)]
+#[schemars(rename = "Distribution{T}")]
+pub struct Distribution<T: DistributionSupport> {
+    bins: Vec<T>,
+    counts: Vec<u64>,
+}
+
+impl<T: DistributionSupport> fmt::Display for Distribution<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let elems = self
+            .bins
+            .iter()
+            .zip(self.counts.iter())
+            .map(|(bin, count)| format!("{bin}: {count}"))
+            .collect::<Vec<_>>()
+            .join(", ");
+        write!(f, "{}", elems)
+    }
+}
+
+impl<T: DistributionSupport> Distribution<T> {
+    // Subtract two distributions, checking that they have the same bins.
+    fn checked_sub(
+        &self,
+        rhs: &Distribution<T>,
+    ) -> Result<Distribution<T>, Error> {
+        anyhow::ensure!(
+            self.bins == rhs.bins,
+            "Cannot subtract distributions with different bins",
+        );
+        let counts = self
+            .counts
+            .iter()
+            .zip(rhs.counts.iter().copied())
+            .map(|(x, y)| x.checked_sub(y))
+            .collect::<Option<_>>()
+            .context("Underflow subtracting distributions values")?;
+        Ok(Self { bins: self.bins.clone(), counts })
+    }
+
+    /// Return the slice of bins.
+    pub fn bins(&self) -> &[T] {
+        &self.bins
+    }
+
+    /// Return the slice of counts.
+    pub fn counts(&self) -> &[u64] {
+        &self.counts
+    }
+
+    /// Return an iterator over each bin and count.
+    pub fn iter(&self) -> impl ExactSizeIterator<Item = (&T, &u64)> + '_ {
+        self.bins.iter().zip(self.counts.iter())
+    }
+}
+
+macro_rules! i64_dist_from {
+    ($t:ty) => {
+        impl From<&oximeter::histogram::Histogram<$t>> for Distribution<i64> {
+            fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self {
+                let (bins, counts) = hist.to_arrays();
+                Self { bins: bins.into_iter().map(i64::from).collect(), counts }
+            }
+        }
+
+        impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum {
+            fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self {
+                CumulativeDatum::IntegerDistribution(hist.into())
+            }
+        }
+    };
+}
+
+i64_dist_from!(i8);
+i64_dist_from!(u8);
+i64_dist_from!(i16);
+i64_dist_from!(u16);
+i64_dist_from!(i32);
+i64_dist_from!(u32);
+i64_dist_from!(i64);
+
+impl TryFrom<&oximeter::histogram::Histogram<u64>> for Distribution<i64> {
+    type Error = Error;
+    fn try_from(
+        hist: &oximeter::histogram::Histogram<u64>,
+    ) -> Result<Self, Self::Error> {
+        let (bins, counts) = hist.to_arrays();
+        let bins = bins
+            .into_iter()
+            .map(i64::try_from)
+            .collect::<Result<_, _>>()
+            .context("Overflow converting u64 to i64")?;
+        Ok(Self { bins, counts })
+    }
+}
+
+impl TryFrom<&oximeter::histogram::Histogram<u64>> for CumulativeDatum {
+    type Error = Error;
+    fn try_from(
+        hist: &oximeter::histogram::Histogram<u64>,
+    ) -> Result<Self, Self::Error> {
+        hist.try_into().map(CumulativeDatum::IntegerDistribution)
+    }
+}
+
+macro_rules! f64_dist_from {
+    ($t:ty) => {
+        impl From<&oximeter::histogram::Histogram<$t>> for Distribution<f64> {
+            fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self {
+                let (bins, counts) = hist.to_arrays();
+                Self { bins: bins.into_iter().map(f64::from).collect(), counts }
+            }
+        }
+
+        impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum {
+            fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self {
+                CumulativeDatum::DoubleDistribution(hist.into())
+            }
+        }
+    };
+}
+
+f64_dist_from!(f32);
+f64_dist_from!(f64);
+
+#[cfg(test)]
+mod tests {
+    use crate::oxql::point::{DataType, ValueArray};
+
+    use super::{Distribution, MetricType, Points, Values};
+    use chrono::{DateTime, Utc};
+    use oximeter::types::Cumulative;
+    use oximeter::Measurement;
+    use std::time::Duration;
+
+    #[test]
+    fn test_point_delta_between() {
+        let mut datum = Cumulative::new(2i64);
+        let now = Utc::now();
+        let meas0 = Measurement::new(now + Duration::from_secs(1), datum);
+        datum.set(10i64);
+        let meas1 = Measurement::new(now + Duration::from_secs(2), datum);
+        let measurements = vec![meas0.clone(), meas1.clone()];
+        let points = Points::delta_from_cumulative(&measurements).unwrap();
+
+        assert_eq!(points.len(), 2);
+        assert_eq!(
+            points.values(0).unwrap().as_integer().unwrap(),
+            &[Some(2i64), Some(8)],
+        );
+        assert_eq!(
+            Duration::from_secs(1),
+            (points.timestamps[1] - points.timestamps[0]).to_std().unwrap(),
+        );
+        let expected = vec![now, meas0.timestamp()];
+        let actual = points.start_times.as_ref().unwrap();
+        assert_eq!(expected.len(), actual.len());
+        for (x, y) in expected.into_iter().zip(actual.into_iter()) {
+            assert!((*y - x).num_nanoseconds().unwrap() <= 1);
+        }
+    }
+
+    #[test]
+    fn test_point_delta_between_with_new_epoch() {
+        let datum = Cumulative::new(2i64);
+        let now = Utc::now();
+        let meas0 = Measurement::new(now + Duration::from_secs(1), datum);
+
+        // Create a new datum, with a completely new start time, representing a
+        // new epoch.
+        let now = Utc::now() + Duration::from_secs(10);
+        let datum = Cumulative::with_start_time(now, 10i64);
+        let meas1 = Measurement::new(now + Duration::from_secs(2), datum);
+        let measurements = vec![meas0.clone(), meas1.clone()];
+        let points = Points::delta_from_cumulative(&measurements).unwrap();
+
+        // The second point should not be referenced to the first, because
+        // they're in different epochs.
+        assert_eq!(points.len(), 2);
+        assert_eq!(
+            points.values(0).unwrap().as_integer().unwrap(),
+            &[Some(2i64), Some(10)],
+        );
+
+        // The start times should be the start times of the measurements
+        // themselves as well. Same for timestamps.
+        assert_eq!(
+            points.timestamps,
+            vec![meas0.timestamp(), meas1.timestamp()],
+        );
+        assert_eq!(
+            points.start_times.as_ref().unwrap(),
+            &[meas0.start_time().unwrap(), meas1.start_time().unwrap()],
+        );
+    }
+
+    #[test]
+    fn test_point_delta_between_overlapping_time_ranges() {
+        // These data points start at `T` and `T + 100ms` respectively, and end
+        // at those times + 1s. That means their time ranges overlap, and so we
+        // can't compute a delta from them.
+        let start_time = Utc::now() - Duration::from_secs(1);
+        let datum1 = Cumulative::with_start_time(start_time, 1i64);
+        let datum2 = Cumulative::with_start_time(
+            start_time + Duration::from_millis(100),
+            10i64,
+        );
+        let meas1 = Measurement::new(
+            datum1.start_time() + Duration::from_secs(1),
+            datum1,
+        );
+        let meas2 = Measurement::new(
+            datum2.start_time() + Duration::from_secs(1),
+            datum2,
+        );
+
+        assert!(
+            Points::delta_from_cumulative(&[meas1.clone(), meas2.clone()])
+                .is_err(),
+            "Should not be able to compute a delta point \
+            between two measuremenst with overlapping start \
+            times: [{}, {}] and [{}, {}]",
+            meas1.start_time().unwrap(),
+            meas1.timestamp(),
+            meas2.start_time().unwrap(),
+            meas2.timestamp(),
+        );
+    }
+
+    fn timestamps(n: usize) -> Vec<DateTime<Utc>> {
+        let now = Utc::now();
+        let mut out = Vec::with_capacity(n);
+        for i in 0..n {
+            out.push(now - Duration::from_secs(i as _));
+        }
+        out.into_iter().rev().collect()
+    }
+
+    #[test]
+    fn test_cast_points_from_bool() {
+        let points = Points {
+            start_times: None,
+            timestamps: timestamps(2),
+            values: vec![Values {
+                values: ValueArray::Boolean(vec![Some(false), Some(true)]),
+                metric_type: MetricType::Gauge,
+            }],
+        };
+
+        let as_same = points.cast(&[DataType::Boolean]).unwrap();
+        let vals = as_same.values[0].values.as_boolean().unwrap();
+        assert_eq!(vals, points.values[0].values.as_boolean().unwrap());
+
+        let as_int = points.cast(&[DataType::Integer]).unwrap();
+        let vals = as_int.values[0].values.as_integer().unwrap();
+        assert_eq!(vals, &vec![Some(0), Some(1)]);
+
+        let as_double = points.cast(&[DataType::Double]).unwrap();
+        let vals = as_double.values[0].values.as_double().unwrap();
+        assert_eq!(vals, &vec![Some(0.0), Some(1.0)]);
+
+        let as_string = points.cast(&[DataType::String]).unwrap();
+        let vals = as_string.values[0].values.as_string().unwrap();
+        assert_eq!(
+            vals,
+            &vec![Some("false".to_string()), Some("true".to_string())]
+        );
+
+        for ty in [DataType::IntegerDistribution, DataType::DoubleDistribution]
+        {
+            assert!(
+                points.cast(&[ty]).is_err(),
+                "Should not be able to cast bool array to distributions"
+            );
+        }
+        assert!(points.cast(&[]).is_err(), "Should fail to cast with no types");
+        assert!(
+            points.cast(&[DataType::Boolean, DataType::Boolean]).is_err(),
+            "Should fail to cast to the wrong number of types"
+        );
+    }
+
+    #[test]
+    fn test_cast_points_from_integer() {
+        let points = Points {
+            start_times: None,
+            timestamps: timestamps(2),
+            values: vec![Values {
+                values: ValueArray::Integer(vec![Some(0), Some(10)]),
+                metric_type: MetricType::Gauge,
+            }],
+        };
+
+        let as_same = points.cast(&[DataType::Integer]).unwrap();
+        let vals = as_same.values[0].values.as_integer().unwrap();
+        assert_eq!(vals, points.values[0].values.as_integer().unwrap());
+
+        let as_bools = points.cast(&[DataType::Boolean]).unwrap();
+        let vals = as_bools.values[0].values.as_boolean().unwrap();
+        assert_eq!(vals, &vec![Some(false), Some(true)]);
+
+        let as_double = points.cast(&[DataType::Double]).unwrap();
+        let vals = as_double.values[0].values.as_double().unwrap();
+        assert_eq!(vals, &vec![Some(0.0), Some(10.0)]);
+
+        let as_string = points.cast(&[DataType::String]).unwrap();
+        let vals = as_string.values[0].values.as_string().unwrap();
+        assert_eq!(vals, &vec![Some("0".to_string()), Some("10".to_string())]);
+
+        for ty in [DataType::IntegerDistribution, DataType::DoubleDistribution]
+        {
+            assert!(
+                points.cast(&[ty]).is_err(),
+                "Should not be able to cast int array to distributions"
+            );
+        }
+        assert!(points.cast(&[]).is_err(), "Should fail to cast with no types");
+        assert!(
+            points.cast(&[DataType::Boolean, DataType::Boolean]).is_err(),
+            "Should fail to cast to the wrong number of types"
+        );
+    }
+
+    #[test]
+    fn test_cast_points_from_double() {
+        let points = Points {
+            start_times: None,
+            timestamps: timestamps(2),
+            values: vec![Values {
+                values: ValueArray::Double(vec![Some(0.0), Some(10.5)]),
+                metric_type: MetricType::Gauge,
+            }],
+        };
+
+        let as_same = points.cast(&[DataType::Double]).unwrap();
+        let vals = as_same.values[0].values.as_double().unwrap();
+        assert_eq!(vals, points.values[0].values.as_double().unwrap());
+
+        let as_bools = points.cast(&[DataType::Boolean]).unwrap();
+        let vals = as_bools.values[0].values.as_boolean().unwrap();
+        assert_eq!(vals, &vec![Some(false), Some(true)]);
+
+        let as_ints = points.cast(&[DataType::Integer]).unwrap();
+        let vals = as_ints.values[0].values.as_integer().unwrap();
+        assert_eq!(vals, &vec![Some(0), Some(10)]);
+
+        let as_string = points.cast(&[DataType::String]).unwrap();
+        let vals = as_string.values[0].values.as_string().unwrap();
+        assert_eq!(
+            vals,
+            &vec![Some("0".to_string()), Some("10.5".to_string())]
+        );
+
+        let points = Points {
+            start_times: None,
+            timestamps: timestamps(2),
+            values: vec![Values {
+                values: ValueArray::Double(vec![Some(0.0), Some(f64::MAX)]),
+                metric_type: MetricType::Gauge,
+            }],
+        };
+        assert!(
+            points.cast(&[DataType::Integer]).is_err(),
+            "Should fail to cast out-of-range doubles to integer"
+        );
+
+        for ty in [DataType::IntegerDistribution, DataType::DoubleDistribution]
+        {
+            assert!(
+                points.cast(&[ty]).is_err(),
+                "Should not be able to cast double array to distributions"
+            );
+        }
+        assert!(points.cast(&[]).is_err(), "Should fail to cast with no types");
+        assert!(
+            points.cast(&[DataType::Boolean, DataType::Boolean]).is_err(),
+            "Should fail to cast to the wrong number of types"
+        );
+    }
+
+    #[test]
+    fn test_cast_points_from_string() {
+        fn make_points(strings: &[&str]) -> Points {
+            Points {
+                start_times: None,
+                timestamps: timestamps(strings.len()),
+                values: vec![Values {
+                    values: ValueArray::String(
+                        strings.iter().map(|&s| Some(s.into())).collect(),
+                    ),
+                    metric_type: MetricType::Gauge,
+                }],
+            }
+        }
+
+        let points = make_points(&["some", "strings"]);
+        let as_same = points.cast(&[DataType::String]).unwrap();
+        assert_eq!(as_same, points);
+
+        // Any non-empty string is truthy, even "false".
+        let points = make_points(&["", "false", "true"]);
+        let as_bools = points.cast(&[DataType::Boolean]).unwrap();
+        let vals = as_bools.values[0].values.as_boolean().unwrap();
+        assert_eq!(vals, &vec![Some(false), Some(true), Some(true)]);
+
+        // Conversion to integers happens by parsing.
+        let points = make_points(&["0", "1"]);
+        let as_ints = points.cast(&[DataType::Integer]).unwrap();
+        let vals = as_ints.values[0].values.as_integer().unwrap();
+        assert_eq!(vals, &vec![Some(0), Some(1)]);
+        for bad in ["1.0", "", "foo", "[]"] {
+            assert!(
+                make_points(&[bad]).cast(&[DataType::Integer]).is_err(),
+                "Should fail to cast non-int string '{}' to integers",
+                bad,
+            );
+        }
+
+        // Conversion to doubles happens by parsing.
+        let points = make_points(&["0", "1.1"]);
+        let as_doubles = points.cast(&[DataType::Double]).unwrap();
+        let vals = as_doubles.values[0].values.as_double().unwrap();
+        assert_eq!(vals, &vec![Some(0.0), Some(1.1)]);
+        for bad in ["", "foo", "[]"] {
+            assert!(
+                make_points(&[bad]).cast(&[DataType::Double]).is_err(),
+                "Should fail to cast non-double string '{}' to double",
+                bad,
+            );
+        }
+
+        // Checks for invalid casts
+        for ty in [DataType::IntegerDistribution, DataType::DoubleDistribution]
+        {
+            assert!(
+                points.cast(&[ty]).is_err(),
+                "Should not be able to cast double array to distributions"
+            );
+        }
+        assert!(points.cast(&[]).is_err(), "Should fail to cast with no types");
+        assert!(
+            points.cast(&[DataType::Boolean, DataType::Boolean]).is_err(),
+            "Should fail to cast to the wrong number of types"
+        );
+    }
+
+    #[test]
+    fn test_cast_points_from_int_distribution() {
+        // We can only "cast" to the same type here.
+        let points = Points {
+            start_times: None,
+            timestamps: timestamps(1),
+            values: vec![Values {
+                values: ValueArray::IntegerDistribution(vec![Some(
+                    Distribution { bins: vec![0, 1, 2], counts: vec![0; 3] },
+                )]),
+                metric_type: MetricType::Gauge,
+            }],
+        };
+        let as_same = points.cast(&[DataType::IntegerDistribution]).unwrap();
+        assert_eq!(points, as_same);
+
+        for ty in [
+            DataType::Boolean,
+            DataType::String,
+            DataType::Integer,
+            DataType::Double,
+            DataType::DoubleDistribution,
+        ] {
+            assert!(
+                points.cast(&[ty]).is_err(),
+                "Should not be able to cast distributions to anything other than itself"
+            );
+        }
+        assert!(points.cast(&[]).is_err());
+        assert!(points
+            .cast(&[
+                DataType::IntegerDistribution,
+                DataType::IntegerDistribution
+            ])
+            .is_err());
+    }
+
+    #[test]
+    fn test_cast_points_from_double_distribution() {
+        // We can only "cast" to the same type here.
+        let points = Points {
+            start_times: None,
+            timestamps: timestamps(1),
+            values: vec![Values {
+                values: ValueArray::DoubleDistribution(vec![Some(
+                    Distribution {
+                        bins: vec![0.0, 1.0, 2.0],
+                        counts: vec![0; 3],
+                    },
+                )]),
+                metric_type: MetricType::Gauge,
+            }],
+        };
+        let as_same = points.cast(&[DataType::DoubleDistribution]).unwrap();
+        assert_eq!(points, as_same);
+
+        for ty in [
+            DataType::Boolean,
+            DataType::String,
+            DataType::Integer,
+            DataType::Double,
+            DataType::IntegerDistribution,
+        ] {
+            assert!(
+                points.cast(&[ty]).is_err(),
+                "Should not be able to cast distributions to anything other than itself"
+            );
+        }
+        assert!(points.cast(&[]).is_err());
+        assert!(points
+            .cast(&[DataType::DoubleDistribution, DataType::DoubleDistribution])
+            .is_err());
+    }
+}
diff --git a/oximeter/db/src/oxql/query/mod.rs b/oximeter/db/src/oxql/query/mod.rs
new file mode 100644
index 0000000000..bb1c0986fe
--- /dev/null
+++ b/oximeter/db/src/oxql/query/mod.rs
@@ -0,0 +1,837 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! A single OxQL query.
+
+// Copyright 2024 Oxide Computer Company
+
+use super::ast::ident::Ident;
+use super::ast::logical_op::LogicalOp;
+use super::ast::table_ops::filter::CompoundFilter;
+use super::ast::table_ops::filter::FilterExpr;
+use super::ast::table_ops::group_by::GroupBy;
+use super::ast::table_ops::BasicTableOp;
+use super::ast::table_ops::TableOp;
+use super::ast::SplitQuery;
+use crate::oxql::ast::grammar;
+use crate::oxql::ast::table_ops::filter::Filter;
+use crate::oxql::ast::Query as QueryNode;
+use crate::oxql::fmt_parse_error;
+use crate::oxql::Error;
+use crate::TimeseriesName;
+use chrono::DateTime;
+use chrono::Utc;
+use std::time::Duration;
+
+/// Special identifiers for column names or other widely-used values.
+pub mod special_idents {
+    use oximeter::DatumType;
+
+    pub const TIMESTAMP: &str = "timestamp";
+    pub const START_TIME: &str = "start_time";
+    pub const DATUM: &str = "datum";
+    pub const BINS: &str = "bins";
+    pub const COUNTS: &str = "counts";
+    pub const DATETIME64: &str = "DateTime64";
+    pub const ARRAYU64: &str = "Array[u64]";
+
+    pub fn array_type_name_from_histogram_type(
+        type_: DatumType,
+    ) -> Option<String> {
+        if !type_.is_histogram() {
+            return None;
+        }
+        Some(format!(
+            "Array[{}]",
+            type_.to_string().strip_prefix("Histogram").unwrap().to_lowercase(),
+        ))
+    }
+}
+
+/// A parsed OxQL query.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Query {
+    pub(super) parsed: QueryNode,
+    pub(super) end_time: DateTime<Utc>,
+}
+
+impl Query {
+    /// Construct a query written in OxQL.
+    pub fn new(query: impl AsRef<str>) -> Result<Self, Error> {
+        let raw = query.as_ref().trim();
+        const MAX_LEN: usize = 4096;
+        anyhow::ensure!(
+            raw.len() <= MAX_LEN,
+            "Queries must be <= {} characters",
+            MAX_LEN,
+        );
+        let parsed = grammar::query_parser::query(raw)
+            .map_err(|e| fmt_parse_error(raw, e))?;
+
+        // Fetch the latest query end time referred to in the parsed query, or
+        // use now if there isn't one.
+        let query_end_time = parsed.query_end_time().unwrap_or_else(Utc::now);
+        Ok(Self { parsed, end_time: query_end_time })
+    }
+
+    /// Return the end time of the query.
+    pub fn end_time(&self) -> &DateTime<Utc> {
+        &self.end_time
+    }
+
+    /// Return the next referenced timeseries name.
+    ///
+    /// Queries always start with either a single `get` operation, which refers
+    /// to one timeseries; or a subquery, each component of which is a query. So
+    /// it is always true that there is exactly one next timeseries name, since
+    /// that comes from the current query, or the next subquery.
+    pub fn timeseries_name(&self) -> &TimeseriesName {
+        self.parsed.timeseries_name()
+    }
+
+    /// Return the transformation table ops, i.e., everything after the initial
+    /// get operation or subquery.
+    pub fn transformations(&self) -> &[TableOp] {
+        self.parsed.transformations()
+    }
+
+    /// Return the set of all predicates in the query, coalesced.
+    ///
+    /// Query optimization is a large topic. There are few rules, and many
+    /// heuristics. However, one of those is extremely useful for our case:
+    /// predicate pushdown. This is where one moves predicates as close as
+    /// possible to the data, filtering out unused data as early as possible in
+    /// query processing.
+    ///
+    /// In our case, _currently_, we can implement this pretty easily. Filtering
+    /// operations can usually be coalesced into a single item. That means:
+    ///
+    /// - successive filtering operations are merged: `filter a | filter b ->
+    /// `filter (a) && (b)`.
+    /// - filtering operations are "pushed down", to just after the initial
+    /// `get` operation in the query.
+    ///
+    /// # Group by
+    ///
+    /// While filters can be combined and pushed down through many operations,
+    /// special care is taken for `group_by`. Specifically, the filter must only
+    /// name columns explicitly named in the `group_by`. If we pushed through
+    /// filters which named one of the columns _within_ the group (one not
+    /// named), then that would change the set of data in a group, and thus the
+    /// result.
+    ///
+    /// # Datum filters
+    ///
+    /// We currently only push down filters on the timestamps, and that is only
+    /// because we do _not_ support aggregations across time, only values. If
+    /// and when we do support that, then filters which reference time also
+    /// cannot be pushed down.
+    ///
+    /// # No predicates
+    ///
+    /// Note that this may return `None`, in the case where there are zero
+    /// predicates of any kind.
+    //
+    // Pushing filters through a group by. Consider the following data:
+    //
+    // a    b   timestamp   datum
+    // 0    0   0           0
+    // 0    0   1           1
+    // 0    1   0           2
+    // 0    1   1           3
+    // 1    0   0           4
+    // 1    0   1           5
+    // 1    1   0           6
+    // 1    1   1           7
+    //
+    // So there are two groups for a and b columns each with two samples.
+    //
+    // Consider `get a:b | group_by [a] | filter a == 0`.
+    //
+    // After the group by, the result is:
+    //
+    // a        timestamp   datum
+    // 0        0           avg([0, 2]) -> 1
+    // 0        1           avg([1, 3]) -> 2
+    // 1        0           avg([4, 6]) -> 5
+    // 1        1           avg([5, 7]) -> 6
+    //
+    // Then after the filter, it becomes:
+    //
+    // a        timestamp   datum
+    // 0        0           avg([0, 2]) -> 1
+    // 0        1           avg([1, 3]) -> 2
+    //
+    // Now, let's do the filter first, as if we pushed that down.
+    // i.e., `get a:b | filter a == 0 | group_by [a]`. After the filter, we get:
+    //
+    // a    b   timestamp   datum
+    // 0    0   0           0
+    // 0    0   1           1
+    // 0    1   0           2
+    // 0    1   1           3
+    //
+    // Then we apply the group by:
+    //
+    // a        timestamp   datum
+    // 0        0           avg([0, 2]) -> 1
+    // 0        1           avg([1, 3]) -> 2
+    //
+    // So we get the same result. Let's suppose we had a filter on the column
+    // `b` instead. Doing the group_by first, we get the exact same result as
+    // the first one above. Or we really get an error, because the resulting
+    // table does not have a `b` column.
+    //
+    // If instead we did the filter first, we'd get a different result. Starting
+    // from:
+    //
+    // a    b   timestamp   datum
+    // 0    0   0           0
+    // 0    0   1           1
+    // 0    1   0           2
+    // 0    1   1           3
+    // 1    0   0           4
+    // 1    0   1           5
+    // 1    1   0           6
+    // 1    1   1           7
+    //
+    // Apply `filter b == 0`:
+    //
+    //
+    // a    b   timestamp   datum
+    // 0    0   0           0
+    // 0    0   1           1
+    // 1    0   0           4
+    // 1    0   1           5
+    //
+    // Then apply group_by [a]
+    //
+    // a        timestamp   datum
+    // 0        0           avg([0, 1]) -> 0.5
+    // 0        1           avg([4, 5]) -> 4.5
+    //
+    // So we get something very different.
+    //
+    // What about filtering by timestamp? Starting from the raw data again:
+    //
+    // a    b   timestamp   datum
+    // 0    0   0           0
+    // 0    0   1           1
+    // 0    1   0           2
+    // 0    1   1           3
+    // 1    0   0           4
+    // 1    0   1           5
+    // 1    1   0           6
+    // 1    1   1           7
+    //
+    // Let's add a `filter timestamp >= 1`. After the `group_by [a]`, we get:
+    //
+    // a        timestamp   datum
+    // 0        0           avg([0, 2]) -> 1
+    // 0        1           avg([1, 3]) -> 2
+    // 1        0           avg([4, 6]) -> 5
+    // 1        1           avg([5, 7]) -> 6
+    //
+    // Then after `filter timestamp >= 1`:
+    //
+    // a        timestamp   datum
+    // 0        1           avg([1, 3]) -> 2
+    // 1        1           avg([5, 7]) -> 6
+    //
+    // Now, filtering the timestamps first, after that we get:
+    //
+    // a    b   timestamp   datum
+    // 0    0   1           1
+    // 0    1   1           3
+    // 1    0   1           5
+    // 1    1   1           7
+    //
+    // Then grouping:
+    //
+    // a        timestamp   datum
+    // 0        1           avg([1, 3]) -> 2
+    // 1        1           avg([5, 7]) -> 6
+    //
+    // So that also works fine.
+    pub(crate) fn coalesced_predicates(
+        &self,
+        mut outer: Option<Filter>,
+    ) -> Option<Filter> {
+        let maybe_filter = self.transformations().iter().rev().fold(
+            None,
+            |maybe_filter, next_tr| {
+                // Transformations only return basic ops, since all the
+                // subqueries must be at the prefix of the query.
+                let TableOp::Basic(op) = next_tr else {
+                    unreachable!();
+                };
+
+                match op {
+                    BasicTableOp::GroupBy(GroupBy { identifiers, .. }) => {
+                        // We may have been passed predicates from an outer
+                        // query. Those also need to be restricted, if we're
+                        // trying to push them through a group_by operation.
+                        outer = outer.as_ref().and_then(|outer| {
+                            restrict_filter_idents(outer, identifiers)
+                        });
+
+                        // Only push through columns referred to in the group by
+                        // itself, which replaces the current filter.
+                        maybe_filter.as_ref().and_then(|current| {
+                            restrict_filter_idents(current, identifiers)
+                        })
+                    }
+                    BasicTableOp::Filter(filter) => {
+                        // Merge with any existing filter.
+                        if let Some(left) = maybe_filter {
+                            Some(left.merge(&filter, LogicalOp::And))
+                        } else {
+                            Some(filter.clone())
+                        }
+                    }
+                    _ => maybe_filter,
+                }
+            },
+        );
+
+        // Merge in any predicates passed from an outer query, which may have
+        // been restricted as we moved through group_by operations.
+        match (outer, maybe_filter) {
+            (None, any) => any,
+            (Some(outer), None) => Some(outer),
+            (Some(outer), Some(inner)) => {
+                Some(outer.merge(&inner, LogicalOp::And))
+            }
+        }
+    }
+
+    pub(crate) fn split(&self) -> SplitQuery {
+        self.parsed.split(self.end_time)
+    }
+}
+
+// Return a new filter containing only parts that refer to either:
+//
+// - a `timestamp` column
+// - a column listed in `identifiers`
+fn restrict_filter_idents(
+    current_filter: &Filter,
+    identifiers: &[Ident],
+) -> Option<Filter> {
+    match &current_filter.expr {
+        FilterExpr::Simple(inner) => {
+            let ident = inner.ident.as_str();
+            if ident == "timestamp"
+                || identifiers.iter().map(Ident::as_str).any(|id| id == ident)
+            {
+                Some(current_filter.clone())
+            } else {
+                None
+            }
+        }
+        FilterExpr::Compound(CompoundFilter { left, op, right }) => {
+            let maybe_left = restrict_filter_idents(left, identifiers);
+            let maybe_right = restrict_filter_idents(right, identifiers);
+            match (maybe_left, maybe_right) {
+                (Some(left), Some(right)) => Some(Filter {
+                    negated: current_filter.negated,
+                    expr: FilterExpr::Compound(CompoundFilter {
+                        left: Box::new(left),
+                        op: *op,
+                        right: Box::new(right),
+                    }),
+                }),
+                (Some(single), None) | (None, Some(single)) => Some(single),
+                (None, None) => None,
+            }
+        }
+    }
+}
+
+/// Describes the time alignment for an OxQL query.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Alignment {
+    /// The end time of the query, which the temporal reference point.
+    pub end_time: DateTime<Utc>,
+    /// The alignment period, the interval on which values are produced.
+    pub period: Duration,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::Filter;
+    use super::Ident;
+    use super::Query;
+    use crate::oxql::ast::cmp::Comparison;
+    use crate::oxql::ast::literal::Literal;
+    use crate::oxql::ast::logical_op::LogicalOp;
+    use crate::oxql::ast::table_ops::filter::CompoundFilter;
+    use crate::oxql::ast::table_ops::filter::FilterExpr;
+    use crate::oxql::ast::table_ops::filter::SimpleFilter;
+    use crate::oxql::ast::table_ops::join::Join;
+    use crate::oxql::ast::table_ops::BasicTableOp;
+    use crate::oxql::ast::table_ops::TableOp;
+    use crate::oxql::ast::SplitQuery;
+    use crate::oxql::query::restrict_filter_idents;
+    use chrono::NaiveDateTime;
+    use chrono::Utc;
+    use std::time::Duration;
+
+    #[test]
+    fn test_restrict_filter_idents_single_atom() {
+        let ident = Ident("foo".into());
+        let filter = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: ident.clone(),
+                cmp: Comparison::Eq,
+                value: Literal::Boolean(false),
+            }),
+        };
+        assert_eq!(
+            restrict_filter_idents(&filter, &[ident.clone()]).unwrap(),
+            filter
+        );
+        assert_eq!(restrict_filter_idents(&filter, &[]), None);
+    }
+
+    #[test]
+    fn test_restrict_filter_idents_single_atom_with_timestamp() {
+        let filter = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("timestamp".into()),
+                cmp: Comparison::Eq,
+                value: Literal::Boolean(false),
+            }),
+        };
+        assert_eq!(restrict_filter_idents(&filter, &[]).unwrap(), filter);
+    }
+
+    #[test]
+    fn test_restrict_filter_idents_expr() {
+        let idents = [Ident("foo".into()), Ident("bar".into())];
+        let left = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: idents[0].clone(),
+                cmp: Comparison::Eq,
+                value: Literal::Boolean(false),
+            }),
+        };
+        let right = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: idents[1].clone(),
+                cmp: Comparison::Eq,
+                value: Literal::Boolean(false),
+            }),
+        };
+        let filter = Filter {
+            negated: false,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(left.clone()),
+                op: LogicalOp::And,
+                right: Box::new(right.clone()),
+            }),
+        };
+        assert_eq!(restrict_filter_idents(&filter, &idents).unwrap(), filter);
+
+        // This should remove the right filter.
+        assert_eq!(
+            restrict_filter_idents(&filter, &idents[..1]).unwrap(),
+            left
+        );
+
+        // And both
+        assert_eq!(restrict_filter_idents(&filter, &[]), None);
+    }
+
+    #[test]
+    fn test_split_query() {
+        let q = Query::new("get a:b").unwrap();
+        let split = q.split();
+        assert_eq!(split, SplitQuery::Flat(q));
+
+        let q = Query::new("get a:b | filter x == 0").unwrap();
+        let split = q.split();
+        assert_eq!(split, SplitQuery::Flat(q));
+
+        let q = Query::new("{ get a:b } | join").unwrap();
+        let split = q.split();
+        let mut inner = Query::new("get a:b").unwrap();
+        inner.end_time = q.end_time;
+        assert_eq!(
+            split,
+            SplitQuery::Nested {
+                subqueries: vec![inner],
+                transformations: vec![TableOp::Basic(BasicTableOp::Join(Join))],
+            }
+        );
+
+        let q = Query::new("{ get a:b | filter x == 0 } | join").unwrap();
+        let split = q.split();
+        let mut inner = Query::new("get a:b | filter x == 0").unwrap();
+        inner.end_time = q.end_time;
+        assert_eq!(
+            split,
+            SplitQuery::Nested {
+                subqueries: vec![inner],
+                transformations: vec![TableOp::Basic(BasicTableOp::Join(Join))],
+            }
+        );
+
+        let q = Query::new("{ get a:b ; get a:b } | join").unwrap();
+        let split = q.split();
+        let mut inner = Query::new("get a:b").unwrap();
+        inner.end_time = q.end_time;
+        assert_eq!(
+            split,
+            SplitQuery::Nested {
+                subqueries: vec![inner; 2],
+                transformations: vec![TableOp::Basic(BasicTableOp::Join(Join))],
+            }
+        );
+
+        let q = Query::new("{ { get a:b ; get a:b } | join } | join").unwrap();
+        let split = q.split();
+        let mut subqueries =
+            vec![Query::new("{ get a:b; get a:b } | join").unwrap()];
+        subqueries[0].end_time = q.end_time;
+        let expected = SplitQuery::Nested {
+            subqueries: subqueries.clone(),
+            transformations: vec![TableOp::Basic(BasicTableOp::Join(Join))],
+        };
+        assert_eq!(split, expected);
+        let split = subqueries[0].split();
+        let mut inner = Query::new("get a:b").unwrap();
+        inner.end_time = q.end_time;
+        assert_eq!(
+            split,
+            SplitQuery::Nested {
+                subqueries: vec![inner; 2],
+                transformations: vec![TableOp::Basic(BasicTableOp::Join(Join))],
+            }
+        );
+    }
+
+    #[test]
+    fn test_coalesce_predicates() {
+        // Passed through group-by unchanged.
+        let q = Query::new("get a:b | group_by [a] | filter a == 0").unwrap();
+        let preds = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Integer(0),
+            }),
+        };
+        assert_eq!(q.coalesced_predicates(None), Some(preds));
+
+        // Merge the first two, then pass through group by.
+        let q = Query::new(
+            "get a:b | group_by [a] | filter a == 0 | filter a == 0",
+        )
+        .unwrap();
+        let atom = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Integer(0),
+            }),
+        };
+        let preds = Filter {
+            negated: false,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(atom.clone()),
+                op: LogicalOp::And,
+                right: Box::new(atom.clone()),
+            }),
+        };
+        assert_eq!(q.coalesced_predicates(None), Some(preds));
+
+        // These are also merged, even though they're on different sides of the
+        // group by.
+        let q = Query::new(
+            "get a:b | filter a == 0 | group_by [a] | filter a == 0",
+        )
+        .unwrap();
+        let atom = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Integer(0),
+            }),
+        };
+        let preds = Filter {
+            negated: false,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(atom.clone()),
+                op: LogicalOp::And,
+                right: Box::new(atom.clone()),
+            }),
+        };
+        assert_eq!(q.coalesced_predicates(None), Some(preds));
+
+        // Second filter is _not_ passed through, because it refers to columns
+        // not in the group by. We have only the first filter.
+        let q = Query::new(
+            "get a:b | filter a == 0 | group_by [a] | filter b == 0",
+        )
+        .unwrap();
+        let preds = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("a".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Integer(0),
+            }),
+        };
+        assert_eq!(q.coalesced_predicates(None), Some(preds));
+    }
+
+    #[test]
+    fn test_coalesce_predicates_into_subqueries() {
+        let q = "{ get a:b; get a:b } | join | filter foo == 'bar'";
+        let query = Query::new(q).unwrap();
+        let preds = query.coalesced_predicates(None).unwrap();
+        let expected_predicate = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("foo".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::String("bar".into()),
+            }),
+        };
+        assert_eq!(preds, expected_predicate);
+
+        // Split the query, which should give us a list of two subqueries,
+        // followed by the join and filter.
+        let SplitQuery::Nested { subqueries, .. } = query.split() else {
+            panic!();
+        };
+        for subq in subqueries.iter() {
+            let inner = subq
+                .coalesced_predicates(Some(expected_predicate.clone()))
+                .unwrap();
+            assert_eq!(
+                inner, expected_predicate,
+                "Predicates passed into an inner subquery should be preserved"
+            );
+        }
+    }
+
+    #[test]
+    fn test_coalesce_predicates_into_subqueries_with_group_by() {
+        let q = "{ get a:b | group_by [baz]; get a:b | group_by [foo] } | \
+                 join | filter foo == 'bar'";
+        let query = Query::new(q).unwrap();
+        let preds = query.coalesced_predicates(None).unwrap();
+        let expected_predicate = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("foo".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::String("bar".into()),
+            }),
+        };
+        assert_eq!(preds, expected_predicate);
+
+        // Split the query, which should give us a list of two subqueries,
+        // followed by the join and filter.
+        let SplitQuery::Nested { subqueries, .. } = query.split() else {
+            panic!();
+        };
+
+        // The first subquery groups by a field "baz", which isn't in the outer
+        // filter. It should have that outer predicate removed, and have no
+        // predicates at all.
+        let subq = &subqueries[0];
+        assert!(
+            subq.coalesced_predicates(Some(expected_predicate.clone()))
+                .is_none(),
+            "Should not push an outer predicate into a subquery, when that \
+            subquery includes a group_by that does not name a field in the \
+            outer predicate"
+        );
+
+        // The second subquery should include the expected predicate, since the
+        // group_by includes the field named in the filter itself.
+        let subq = &subqueries[1];
+        let inner = subq
+            .coalesced_predicates(Some(expected_predicate.clone()))
+            .unwrap();
+        assert_eq!(
+            inner, expected_predicate,
+            "Predicates passed into an inner subquery should be preserved, \
+            when that inner subquery includes a group_by that names the \
+            ident in the outer filter"
+        );
+    }
+
+    #[test]
+    fn test_coalesce_predicates_merged_into_subqueries() {
+        let q = "{ get a:b | filter baz == 0; get a:b | filter baz == 0 } \
+                 | join | filter foo == 'bar'";
+        let query = Query::new(q).unwrap();
+        let preds = query.coalesced_predicates(None).unwrap();
+        let expected_predicate = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("foo".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::String("bar".into()),
+            }),
+        };
+        assert_eq!(preds, expected_predicate);
+        let expected_inner_predicate = Filter {
+            negated: false,
+            expr: FilterExpr::Simple(SimpleFilter {
+                ident: Ident("baz".to_string()),
+                cmp: Comparison::Eq,
+                value: Literal::Integer(0),
+            }),
+        };
+
+        // Split the query, which should give us a list of two subqueries,
+        // followed by the join and filter.
+        let SplitQuery::Nested { subqueries, .. } = query.split() else {
+            panic!();
+        };
+        for subq in subqueries.iter() {
+            let inner = subq
+                .coalesced_predicates(Some(expected_predicate.clone()))
+                .unwrap();
+            assert_eq!(
+                inner,
+                expected_predicate.merge(&expected_inner_predicate, LogicalOp::And),
+                "Predicates passed into an inner subquery should be preserved, \
+                and merged with any subquery predicates",
+            );
+        }
+    }
+
+    #[test]
+    fn test_query_end_time() {
+        const MAX_DIFF: i64 = 1_000;
+        let q = Query::new("get a:b").unwrap();
+        assert!(
+            (q.end_time - Utc::now()).num_nanoseconds().unwrap() < MAX_DIFF,
+            "Query which does not explicitly name an end time should \
+            use now as the end time",
+        );
+
+        let q = Query::new("get a:b | filter timestamp > @now() - 1s").unwrap();
+        assert!(
+            (q.end_time - Utc::now()).num_nanoseconds().unwrap() < MAX_DIFF,
+            "Query which does not explicitly name an end time should \
+            use now as the end time",
+        );
+
+        let then = Utc::now() - Duration::from_secs(60);
+        let as_str = then.format("%Y-%m-%dT%H:%M:%S.%f");
+        let q = Query::new(&format!("get a:b | filter timestamp < @{as_str}"))
+            .unwrap();
+        assert_eq!(
+            q.end_time, then,
+            "Query with a less-than filter and a timestamp should \
+            set the query end time"
+        );
+
+        let q = Query::new(&format!("get a:b | filter timestamp <= @{as_str}"))
+            .unwrap();
+        assert_eq!(
+            q.end_time, then,
+            "Query with a less-than-or-equal filter and a timestamp should \
+            set the query end time"
+        );
+
+        let q = Query::new(&format!("get a:b | filter timestamp > @{as_str}"))
+            .unwrap();
+        assert!(
+            (q.end_time - Utc::now()).num_nanoseconds().unwrap() < MAX_DIFF,
+            "Query with a greater-than timestamp filter should not set an \
+            explicit query end time, and so use now"
+        );
+
+        let q = Query::new("get a:b | filter timestamp > @now() - 1d").unwrap();
+        assert!(
+            (q.end_time - Utc::now()).num_nanoseconds().unwrap() < MAX_DIFF,
+            "Query which does not explicitly name an end time should \
+            use now as the end time",
+        );
+
+        let q = Query::new(&format!(
+            "get a:b | filter timestamp > @now() - 1d && timestamp < @{as_str}"
+        ))
+        .unwrap();
+        assert_eq!(
+            q.end_time,
+            then,
+            "Query with a compound less-than-or-equal filter and a timestamp should \
+            set the query end time"
+        );
+
+        let then = Utc::now() - Duration::from_secs(60);
+        let then_as_str = then.format("%Y-%m-%dT%H:%M:%S.%f");
+        let even_earlier = then - Duration::from_secs(10);
+        let even_earlier_as_str = even_earlier.format("%Y-%m-%dT%H:%M:%S.%f");
+        let q = Query::new(&format!(
+            "get a:b | filter timestamp < @{then_as_str} || timestamp < @{even_earlier_as_str}"
+        ))
+        .unwrap();
+        assert_eq!(
+            q.end_time,
+            then,
+            "Query with two less-than timestamp filters should use the later timestamp"
+        );
+
+        let expected = NaiveDateTime::parse_from_str(
+            "2024-03-13T06:24:00",
+            "%Y-%m-%dT%H:%M:%S%.f",
+        )
+        .unwrap()
+        .and_utc();
+        let q = "{ \
+            get physical_data_link:bytes_sent ; \
+            get physical_data_link:bytes_received \
+            } | filter timestamp > @2024-03-13T06:20:00 && timestamp < @2024-03-13T06:24:00";
+        let query = Query::new(q).unwrap();
+        assert_eq!(query.end_time, expected);
+    }
+
+    #[test]
+    fn test_query_end_time_across_subqueries() {
+        let now = Utc::now();
+        const FMT: &str = "%Y-%m-%dT%H:%M:%S.%f";
+        let first = now - Duration::from_secs(1);
+        let second = now - Duration::from_secs_f64(1e-3);
+        let q = format!(
+            "{{ \
+                get a:b | filter timestamp > @{}; \
+                get a:b | filter timestamp > @{} \
+            }}",
+            first.format(FMT),
+            second.format(FMT),
+        );
+        let query = Query::new(q).unwrap();
+        assert!(
+            query.end_time > second,
+            "This nested query should have used Utc::now() as the end time"
+        );
+        let end_time = query.end_time;
+        let SplitQuery::Nested { subqueries, .. } = query.split() else {
+            unreachable!();
+        };
+        for subq in subqueries.iter() {
+            assert_eq!(
+                subq.end_time, end_time,
+                "All subqueries should have the same end time."
+            );
+        }
+    }
+}
diff --git a/oximeter/db/src/oxql/table.rs b/oximeter/db/src/oxql/table.rs
new file mode 100644
index 0000000000..025935090b
--- /dev/null
+++ b/oximeter/db/src/oxql/table.rs
@@ -0,0 +1,293 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Definitions of timeseries and groups of them, a [`Table`].
+
+// Copyright 2024 Oxide Computer Company
+
+use super::point::DataType;
+use super::point::MetricType;
+use super::point::Points;
+use super::query::Alignment;
+use super::Error;
+use crate::TimeseriesKey;
+use highway::HighwayHasher;
+use oximeter::FieldValue;
+use schemars::JsonSchema;
+use serde::Deserialize;
+use serde::Serialize;
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap;
+use std::hash::Hash;
+use std::hash::Hasher;
+
+/// A timeseries contains a timestamped set of values from one source.
+///
+/// This includes the typed key-value pairs that uniquely identify it, and the
+/// set of timestamps and data values from it.
+#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)]
+pub struct Timeseries {
+    pub fields: BTreeMap<String, FieldValue>,
+    pub points: Points,
+    #[serde(skip)]
+    pub(crate) alignment: Option<Alignment>,
+}
+
+impl Timeseries {
+    /// Construct a new timeseries, from its fields.
+    ///
+    /// It holds no points or type information. That will be enforced by the
+    /// points type as they are added.
+    pub fn new(
+        fields: impl Iterator<Item = (String, FieldValue)>,
+        data_type: DataType,
+        metric_type: MetricType,
+    ) -> Result<Self, Error> {
+        let fields: BTreeMap<_, _> = fields.collect();
+        anyhow::ensure!(!fields.is_empty(), "Fields cannot be empty");
+        Ok(Self {
+            fields,
+            points: Points::empty(data_type, metric_type),
+            alignment: None,
+        })
+    }
+
+    pub fn key(&self) -> TimeseriesKey {
+        // NOTE: The key here is _not_ stable, like the one used in the database
+        // itself to identify timeseries. That's OK, however, because we do not
+        // serialize this value anywhere -- it's used entirely for the lifetime
+        // of one query, and then thrown away, and only needs to be consistent
+        // for that long.
+        let mut hasher = HighwayHasher::default();
+        for (name, value) in self.fields.iter() {
+            name.hash(&mut hasher);
+            value.hash(&mut hasher);
+        }
+        hasher.finish()
+    }
+
+    /// Return a copy of the timeseries, keeping only the provided fields.
+    ///
+    /// An error is returned if the timeseries does not contain those fields.
+    pub(crate) fn copy_with_fields(
+        &self,
+        kept_fields: &[&str],
+    ) -> Result<Self, Error> {
+        let mut fields = BTreeMap::new();
+        for field in kept_fields {
+            let Some(f) = self.fields.get(*field) else {
+                anyhow::bail!("Timeseries does not contain field '{}'", field);
+            };
+            fields.insert(field.to_string(), f.clone());
+        }
+        Ok(Self {
+            fields,
+            points: self.points.clone(),
+            alignment: self.alignment,
+        })
+    }
+
+    // Return `true` if the schema in `other` matches that of `self`.
+    fn matches_schema(&self, other: &Timeseries) -> bool {
+        if self.fields.len() != other.fields.len() {
+            return false;
+        }
+        for (f0, f1) in self.fields.iter().zip(other.fields.iter()) {
+            // Check the field names.
+            if f0.0 != f1.0 {
+                return false;
+            }
+            // And types.
+            if f0.1.field_type() != f1.1.field_type() {
+                return false;
+            }
+        }
+
+        // And the type info is the same as well.
+        if !self
+            .points
+            .data_types()
+            .zip(other.points.data_types())
+            .all(|(x, y)| x == y)
+        {
+            return false;
+        }
+        self.points
+            .metric_types()
+            .zip(other.points.metric_types())
+            .all(|(x, y)| x == y)
+    }
+
+    /// Return a new timeseries, with the points cast to the provided list of
+    /// data types.
+    ///
+    /// This returns an error if the points cannot be so cast, or the
+    /// dimensionality of the types requested differs from the dimensionality of
+    /// the points themselves.
+    pub(crate) fn cast(&self, types: &[DataType]) -> Result<Timeseries, Error> {
+        let fields = self.fields.clone();
+        Ok(Self {
+            fields,
+            points: self.points.cast(types)?,
+            alignment: self.alignment,
+        })
+    }
+}
+
+/// A table represents one or more timeseries with the same schema.
+///
+/// A table is the result of an OxQL query. It contains a name, usually the name
+/// of the timeseries schema from which the data is derived, and any number of
+/// timeseries, which contain the actual data.
+#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)]
+pub struct Table {
+    // The name of the table.
+    //
+    // This starts as the name of the timeseries schema the data is derived
+    // from, but can be modified as operations are done.
+    pub(super) name: String,
+    // The set of timeseries in the table, ordered by key.
+    timeseries: BTreeMap<TimeseriesKey, Timeseries>,
+}
+
+impl Table {
+    /// Create a new table, with no timeseries.
+    pub fn new(name: impl AsRef<str>) -> Self {
+        Self { name: name.as_ref().to_string(), timeseries: BTreeMap::new() }
+    }
+
+    /// Create a table from a set of timeseries.
+    pub fn from_timeseries(
+        name: impl AsRef<str>,
+        t: impl Iterator<Item = Timeseries>,
+    ) -> Result<Self, Error> {
+        let mut out = Self::new(name);
+        for each in t {
+            out.insert(each)?;
+        }
+        Ok(out)
+    }
+
+    /// Return the name of the table.
+    pub fn name(&self) -> &str {
+        self.name.as_str()
+    }
+
+    /// Return the number of timeseries in this table.
+    pub fn n_timeseries(&self) -> usize {
+        self.timeseries.len()
+    }
+
+    /// Return the list of timeseries in this table, ordered by key.
+    pub fn timeseries(&self) -> impl ExactSizeIterator<Item = &Timeseries> {
+        self.timeseries.values()
+    }
+
+    // Check that the schema of `other` matches `self`.
+    //
+    // That means the fields have the same names and types, and the timeseries
+    // have the same type info.
+    fn matches_schema(&self, other: &Timeseries) -> bool {
+        if let Some((_, first)) = self.timeseries.first_key_value() {
+            first.matches_schema(other)
+        } else {
+            // Table is empty.
+            true
+        }
+    }
+
+    /// Get a timeseries matching the provided key, if any.
+    pub fn get_mut(&mut self, key: TimeseriesKey) -> Option<&mut Timeseries> {
+        self.timeseries.get_mut(&key)
+    }
+
+    /// Insert a new timeseries into the table.
+    ///
+    /// If the timeseries already exists, an error is returned. Use
+    /// [`Table::replace()`] to replace an existing timeseries.
+    ///
+    /// It is an error if the timeseries does not have the same schema as the
+    /// others in the table (if any).
+    pub fn insert(&mut self, timeseries: Timeseries) -> Result<(), Error> {
+        anyhow::ensure!(
+            self.matches_schema(&timeseries),
+            "Timeseries in a table must have the same schema",
+        );
+        let key = timeseries.key();
+        let Entry::Vacant(e) = self.timeseries.entry(key) else {
+            return Err(anyhow::anyhow!(
+                "Timeseries with key {} already exists",
+                key,
+            ));
+        };
+        e.insert(timeseries);
+        Ok(())
+    }
+
+    /// Replace a timeseries in the table.
+    pub fn replace(&mut self, timeseries: Timeseries) {
+        let key = timeseries.key();
+        let _ = self.timeseries.insert(key, timeseries);
+    }
+
+    /// Add multiple timeseries to the table.
+    ///
+    /// An error is returned if any timeseries already exist.
+    pub fn extend(
+        &mut self,
+        timeseries: impl Iterator<Item = Timeseries>,
+    ) -> Result<(), Error> {
+        for t in timeseries {
+            self.insert(t)?;
+        }
+        Ok(())
+    }
+
+    /// Return the number of timeseries in the table.
+    pub fn len(&self) -> usize {
+        self.timeseries.len()
+    }
+
+    /// Return a mutable iterator over timeseries in the table.
+    pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut Timeseries> {
+        self.timeseries.values_mut()
+    }
+
+    /// Return an iterator over timeseries in the table.
+    pub fn iter(&self) -> impl Iterator<Item = &Timeseries> {
+        self.timeseries.values()
+    }
+
+    /// Consume the table and return an iterator over its timeseries.
+    pub fn into_iter(self) -> impl Iterator<Item = Timeseries> {
+        self.timeseries.into_values()
+    }
+
+    /// Return `true` if all the timeseries in this table are aligned, with the
+    /// same alignment information.
+    ///
+    /// If there are no timeseries, `false` is returned.
+    pub fn is_aligned(&self) -> bool {
+        let mut timeseries = self.timeseries.values();
+        let Some(t) = timeseries.next() else {
+            return false;
+        };
+        let Some(alignment) = t.alignment else {
+            return false;
+        };
+        timeseries.all(|t| t.alignment == Some(alignment))
+    }
+
+    /// Return the alignment of this table, if all timeseries are aligned with
+    /// the same alignment.
+    pub fn alignment(&self) -> Option<Alignment> {
+        if self.is_aligned() {
+            Some(
+                self.timeseries.first_key_value().unwrap().1.alignment.unwrap(),
+            )
+        } else {
+            None
+        }
+    }
+}
diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs
index 9212769573..e14dfbbc55 100644
--- a/oximeter/db/src/query.rs
+++ b/oximeter/db/src/query.rs
@@ -576,33 +576,32 @@ impl SelectQuery {
         match self.field_selectors.len() {
             0 => None,
             n => {
-                // Select timeseries key for first column, plus field name and field value for
-                // all columns.
-                const SELECTED_COLUMNS: &[&str] =
-                    &["field_name", "field_value"];
+                // Select timeseries key for first column, plus the field value
+                // for all columns, aliased to the field name.
                 const JOIN_COLUMNS: &[&str] =
                     &["timeseries_name", "timeseries_key"];
-                let mut top_level_columns =
-                    Vec::with_capacity(1 + SELECTED_COLUMNS.len() * n);
+                let mut top_level_columns = Vec::with_capacity(2 + n);
                 top_level_columns.push(String::from(
                     "filter0.timeseries_key as timeseries_key",
                 ));
                 let mut from_statements = String::new();
-                for (i, subquery) in self
+                for (i, (field_name, subquery)) in self
                     .field_selectors
-                    .values()
-                    .map(|sel| {
-                        sel.as_query(&self.timeseries_schema.timeseries_name)
+                    .iter()
+                    .map(|(field_schema, selector)| {
+                        (
+                            &field_schema.name,
+                            selector.as_query(
+                                &self.timeseries_schema.timeseries_name,
+                            ),
+                        )
                     })
                     .enumerate()
                 {
-                    for column in SELECTED_COLUMNS {
-                        top_level_columns.push(format!(
-                            "filter{i}.{column}",
-                            i = i,
-                            column = column
-                        ));
-                    }
+                    top_level_columns.push(format!(
+                        "filter{}.field_value AS {}",
+                        i, field_name,
+                    ));
 
                     if i == 0 {
                         from_statements.push_str(&format!(
@@ -1028,8 +1027,8 @@ mod tests {
             concat!(
                 "SELECT ",
                 "filter0.timeseries_key as timeseries_key, ",
-                "filter0.field_name, filter0.field_value, ",
-                "filter1.field_name, filter1.field_value ",
+                "filter0.field_value AS f0, ",
+                "filter1.field_value AS f1 ",
                 "FROM (",
                 "SELECT * FROM oximeter.fields_i64 ",
                 "WHERE timeseries_name = 'foo:bar' ",
@@ -1095,8 +1094,8 @@ mod tests {
             concat!(
                 "SELECT ",
                 "filter0.timeseries_key as timeseries_key, ",
-                "filter0.field_name, filter0.field_value, ",
-                "filter1.field_name, filter1.field_value ",
+                "filter0.field_value AS f0, ",
+                "filter1.field_value AS f1 ",
                 "FROM (",
                 "SELECT * FROM oximeter.fields_i64 ",
                 "WHERE timeseries_name = 'foo:bar' AND field_name = 'f0' AND field_value = 0",
@@ -1152,8 +1151,8 @@ mod tests {
             query.field_query().unwrap(),
             concat!(
                 "SELECT filter0.timeseries_key as timeseries_key, ",
-                "filter0.field_name, filter0.field_value, ",
-                "filter1.field_name, filter1.field_value ",
+                "filter0.field_value AS f0, ",
+                "filter1.field_value AS f1 ",
                 "FROM (",
                 "SELECT * FROM oximeter.fields_i64 ",
                 "WHERE timeseries_name = 'foo:bar' AND field_name = 'f0' AND field_value = 0",
diff --git a/oximeter/db/src/sql/mod.rs b/oximeter/db/src/sql/mod.rs
index 5d9685d19f..8a5bd20bde 100644
--- a/oximeter/db/src/sql/mod.rs
+++ b/oximeter/db/src/sql/mod.rs
@@ -32,6 +32,7 @@ use crate::query::measurement_table_name;
 use crate::DatumType;
 use crate::Error as OxdbError;
 use crate::FieldType;
+use crate::QuerySummary;
 use crate::TimeseriesName;
 use crate::TimeseriesSchema;
 use indexmap::IndexSet;
@@ -131,6 +132,31 @@ macro_rules! unsupported {
     };
 }
 
+/// A tabular result from a SQL query against a timeseries.
+#[derive(Clone, Debug, Default, serde::Serialize)]
+pub struct Table {
+    /// The name of each column in the result set.
+    pub column_names: Vec<String>,
+    /// The rows of the result set, one per column.
+    pub rows: Vec<Vec<serde_json::Value>>,
+}
+
+/// The full result of running a SQL query against a timeseries.
+#[derive(Clone, Debug)]
+pub struct QueryResult {
+    /// The query as written by the client.
+    pub original_query: String,
+    /// The rewritten query, run against the JOINed representation of the
+    /// timeseries.
+    ///
+    /// This is the query that is actually run in the database itself.
+    pub rewritten_query: String,
+    /// Summary of the resource usage of the query.
+    pub summary: QuerySummary,
+    /// The result of the query, with column names and rows.
+    pub table: Table,
+}
+
 /// A helper type to preprocess any ClickHouse-specific SQL, and present a
 /// known-safe version of it to the main `sqlparser` code.
 ///
diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs
index eff5c399e3..04289a7297 100644
--- a/oximeter/oximeter/src/types.rs
+++ b/oximeter/oximeter/src/types.rs
@@ -311,7 +311,7 @@ pub enum DatumType {
 
 impl DatumType {
     /// Return `true` if this datum type is cumulative, and `false` otherwise.
-    pub fn is_cumulative(&self) -> bool {
+    pub const fn is_cumulative(&self) -> bool {
         matches!(
             self,
             DatumType::CumulativeI64
@@ -331,9 +331,26 @@ impl DatumType {
         )
     }
 
+    /// Return `true` if this datum type is a scalar, and `false` otherwise.
+    pub const fn is_scalar(&self) -> bool {
+        !self.is_histogram()
+    }
+
     /// Return `true` if this datum type is a histogram, and `false` otherwise.
     pub const fn is_histogram(&self) -> bool {
-        matches!(self, DatumType::HistogramF64 | DatumType::HistogramI64)
+        matches!(
+            self,
+            DatumType::HistogramI8
+                | DatumType::HistogramU8
+                | DatumType::HistogramI16
+                | DatumType::HistogramU16
+                | DatumType::HistogramI32
+                | DatumType::HistogramU32
+                | DatumType::HistogramI64
+                | DatumType::HistogramU64
+                | DatumType::HistogramF32
+                | DatumType::HistogramF64
+        )
     }
 }
 
@@ -450,6 +467,11 @@ impl Datum {
             Datum::Missing(ref inner) => inner.start_time(),
         }
     }
+
+    /// Return true if this datum is missing.
+    pub fn is_missing(&self) -> bool {
+        matches!(self, Datum::Missing(_))
+    }
 }
 
 // Helper macro to generate `From<T>` and `From<&T>` for the datum types.
@@ -580,7 +602,7 @@ impl Measurement {
 
     /// Return true if this measurement represents a missing datum.
     pub fn is_missing(&self) -> bool {
-        matches!(self.datum, Datum::Missing(_))
+        self.datum.is_missing()
     }
 
     /// Return the datum for this measurement
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index a2d853ac23..659b10c721 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -74,6 +74,7 @@ num-integer = { version = "0.1.46", features = ["i128"] }
 num-iter = { version = "0.1.44", default-features = false, features = ["i128"] }
 num-traits = { version = "0.2.18", features = ["i128", "libm"] }
 openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_serializing_defaults"] }
+peg-runtime = { version = "0.8.2", default-features = false, features = ["std"] }
 pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] }
 petgraph = { version = "0.6.4", features = ["serde-1"] }
 postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] }
@@ -179,6 +180,7 @@ num-integer = { version = "0.1.46", features = ["i128"] }
 num-iter = { version = "0.1.44", default-features = false, features = ["i128"] }
 num-traits = { version = "0.2.18", features = ["i128", "libm"] }
 openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_serializing_defaults"] }
+peg-runtime = { version = "0.8.2", default-features = false, features = ["std"] }
 pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] }
 petgraph = { version = "0.6.4", features = ["serde-1"] }
 postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] }