diff --git a/Cargo.lock b/Cargo.lock
index a667cacef7..a6ac143c3a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7901,6 +7901,7 @@ dependencies = [
  "strum",
  "tabled",
  "tempfile",
+ "termtree 0.5.1",
  "thiserror",
  "tokio",
  "tokio-util",
@@ -8789,7 +8790,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "41b740d195ed3166cd147c8047ec98db0e22ec019eb8eeb76d343b795304fb13"
 dependencies = [
  "predicates-core",
- "termtree",
+ "termtree 0.4.1",
 ]
 
 [[package]]
@@ -11588,6 +11589,12 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
 
+[[package]]
+name = "termtree"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
+
 [[package]]
 name = "test-strategy"
 version = "0.3.1"
diff --git a/Cargo.toml b/Cargo.toml
index 8f4ba04ad1..1e64cb538d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -629,6 +629,7 @@ tar = "0.4"
 tempfile = "3.10"
 term = "0.7"
 termios = "0.3"
+termtree = "0.5.1"
 textwrap = "0.16.1"
 test-strategy = "0.3.1"
 thiserror = "1.0"
diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml
index 180f9738d2..df341ceeb0 100644
--- a/oximeter/db/Cargo.toml
+++ b/oximeter/db/Cargo.toml
@@ -43,6 +43,7 @@ slog-async.workspace = true
 slog-dtrace.workspace = true
 slog-term.workspace = true
 strum.workspace = true
+termtree.workspace = true
 thiserror.workspace = true
 tokio-util.workspace = true
 usdt.workspace = true
diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs
index 52be9ae81c..ebfa24ac59 100644
--- a/oximeter/db/src/client/oxql.rs
+++ b/oximeter/db/src/client/oxql.rs
@@ -15,6 +15,7 @@ use crate::oxql::ast::table_ops::filter;
 use crate::oxql::ast::table_ops::filter::Filter;
 use crate::oxql::ast::table_ops::limit::Limit;
 use crate::oxql::ast::table_ops::limit::LimitKind;
+use crate::oxql::Query;
 use crate::query::field_table_name;
 use crate::Error;
 use crate::Metric;
@@ -113,6 +114,40 @@ struct ConsistentKeyGroup {
 }
 
 impl Client {
+    /// Build a query plan for the OxQL query.
+    pub async fn plan_oxql_query(
+        &self,
+        query: impl AsRef<str>,
+    ) -> Result<oxql::plan::Plan, Error> {
+        let query = query.as_ref();
+        let parsed_query = oxql::Query::new(query)?;
+        self.build_query_plan(&parsed_query).await
+    }
+
+    /// Build a query plan for the OxQL query.
+    async fn build_query_plan(
+        &self,
+        query: &Query,
+    ) -> Result<oxql::plan::Plan, Error> {
+        let referenced_timeseries = query.all_timeseries_names();
+        let schema = self
+            .schema
+            .lock()
+            .await
+            .iter()
+            .filter_map(|(name, schema)| {
+                if referenced_timeseries.contains(name) {
+                    Some((name.clone(), schema.clone()))
+                } else {
+                    None
+                }
+            })
+            .collect();
+        let plan =
+            oxql::plan::Plan::new(query.parsed_query().clone(), &schema)?;
+        Ok(plan)
+    }
+
     /// Run a OxQL query.
     pub async fn oxql_query(
         &self,
@@ -132,6 +167,15 @@ impl Client {
         // See https://github.com/oxidecomputer/omicron/issues/5298.
         let query = query.as_ref();
         let parsed_query = oxql::Query::new(query)?;
+        let plan = self.build_query_plan(&parsed_query).await?;
+        if plan.requires_full_table_scan() {
+            return Err(Error::Oxql(anyhow::anyhow!(
+                "This query requires at least one full table scan. \
+                Please rewrite the query to filter either the fields \
+                or timestamps, in order to reduce the amount of data \
+                fetched from the database."
+            )));
+        }
         let query_id = Uuid::new_v4();
         let query_log =
             self.log.new(slog::o!("query_id" => query_id.to_string()));
@@ -837,12 +881,12 @@ impl Client {
         // return.
         //
         // This is used to ensure that we never go above the limit in
-        // `MAX_RESULT_SIZE`. That restricts the _total_ number of rows we want
-        // to retch from the database. So we set our limit to be one more than
-        // the remainder on our allotment. If we get exactly as many as we set
-        // in the limit, then we fail the query because there are more rows that
-        // _would_ be returned. We don't know how many more, but there is at
-        // least 1 that pushes us over the limit. This prevents tricky
+        // `MAX_DATABASE_ROWS`. That restricts the _total_ number of rows we
+        // want to retch from the database. So we set our limit to be one more
+        // than the remainder on our allotment. If we get exactly as many as we
+        // set in the limit, then we fail the query because there are more row
+        // that _would_ be returned. We don't know how many more, but there is
+        // at least 1 that pushes us over the limit. This prevents tricky
         // TOCTOU-like bugs where we need to check the limit twice, and improves
         // performance, since we don't return much more than we could possibly
         // handle.
@@ -1293,7 +1337,9 @@ mod tests {
     #[tokio::test]
     async fn test_get_entire_table() {
         let ctx = setup_oxql_test("test_get_entire_table").await;
-        let query = "get some_target:some_metric";
+        // We need _some_ filter here to avoid a provable full-table scan.
+        let query =
+            "get some_target:some_metric | filter timestamp > @2020-01-01";
         let result = ctx
             .client
             .oxql_query(query)
diff --git a/oximeter/db/src/oxql/ast/literal.rs b/oximeter/db/src/oxql/ast/literal.rs
index d80977fe49..4420a09fa6 100644
--- a/oximeter/db/src/oxql/ast/literal.rs
+++ b/oximeter/db/src/oxql/ast/literal.rs
@@ -13,6 +13,7 @@ use chrono::DateTime;
 use chrono::Utc;
 use oximeter::FieldType;
 use oximeter::FieldValue;
+use oxql_types::point::DataType;
 use regex::Regex;
 use std::borrow::Borrow;
 use std::fmt;
@@ -35,6 +36,20 @@ pub enum Literal {
 }
 
 impl Literal {
+    // Return the name of this literal's type as a string.
+    pub(crate) fn type_name(&self) -> &'static str {
+        match self {
+            Literal::Integer(_) => "Integer",
+            Literal::Double(_) => "Double",
+            Literal::String(_) => "String",
+            Literal::Boolean(_) => "Boolean",
+            Literal::Uuid(_) => "Uuid",
+            Literal::Duration(_) => "Duration",
+            Literal::Timestamp(_) => "Timestamp",
+            Literal::IpAddr(_) => "IpAddr",
+        }
+    }
+
     // Format the literal as a safe, typed string for ClickHouse.
     pub(crate) fn as_db_safe_string(&self) -> String {
         match self {
@@ -93,6 +108,22 @@ impl Literal {
         }
     }
 
+    // Return true if this literal can be compared to a datum of the provided
+    // type.
+    pub(crate) fn is_compatible_with_datum(&self, data_type: DataType) -> bool {
+        match (self, data_type) {
+            (Literal::Integer(_), DataType::Integer)
+            | (Literal::Double(_), DataType::Double)
+            | (Literal::String(_), DataType::String)
+            | (Literal::Boolean(_), DataType::Boolean)
+            | (Literal::Duration(_), DataType::Integer)
+            | (Literal::Duration(_), DataType::Double)
+            | (Literal::Timestamp(_), DataType::Integer)
+            | (Literal::Timestamp(_), DataType::Double) => true,
+            (_, _) => false,
+        }
+    }
+
     /// Apply the comparison op between self and the provided field.
     ///
     /// Return None if the comparison cannot be applied, either because the type
diff --git a/oximeter/db/src/oxql/ast/mod.rs b/oximeter/db/src/oxql/ast/mod.rs
index 21fe5b0387..8d09bb4096 100644
--- a/oximeter/db/src/oxql/ast/mod.rs
+++ b/oximeter/db/src/oxql/ast/mod.rs
@@ -6,6 +6,9 @@
 
 // Copyright 2024 Oxide Computer Company
 
+use std::collections::BTreeSet;
+use std::fmt;
+
 use chrono::DateTime;
 use chrono::Utc;
 use oximeter::TimeseriesName;
@@ -26,12 +29,32 @@ pub struct Query {
     ops: Vec<TableOp>,
 }
 
+impl fmt::Display for Query {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let n_ops = self.ops.len();
+        for (i, op) in self.ops.iter().enumerate() {
+            write!(f, "{op}")?;
+            if i < n_ops - 1 {
+                write!(f, " | ")?;
+            }
+        }
+        Ok(())
+    }
+}
+
 impl Query {
     // Return the first operation in the query, which is always a form of `get`.
     fn first_op(&self) -> &TableOp {
         self.ops.first().expect("Should have parsed at least 1 operation")
     }
 
+    /// Iterate over the table operations.
+    pub(crate) fn table_ops(
+        &self,
+    ) -> impl ExactSizeIterator<Item = &'_ TableOp> + '_ {
+        self.ops.iter()
+    }
+
     pub(crate) fn timeseries_name(&self) -> &TimeseriesName {
         match self.first_op() {
             TableOp::Basic(BasicTableOp::Get(n)) => n,
@@ -42,6 +65,32 @@ impl Query {
         }
     }
 
+    /// Return _all_ timeseries names referred to by get table operations.
+    pub(crate) fn all_timeseries_names(&self) -> BTreeSet<&TimeseriesName> {
+        let mut set = BTreeSet::new();
+        self.all_timeseries_names_impl(&mut set);
+        set
+    }
+
+    fn all_timeseries_names_impl<'a>(
+        &'a self,
+        set: &mut BTreeSet<&'a TimeseriesName>,
+    ) {
+        for op in self.ops.iter() {
+            match op {
+                TableOp::Basic(BasicTableOp::Get(name)) => {
+                    set.insert(name);
+                }
+                TableOp::Basic(_) => {}
+                TableOp::Grouped(GroupedTableOp { ops }) => {
+                    for query in ops.iter() {
+                        query.all_timeseries_names_impl(set);
+                    }
+                }
+            }
+        }
+    }
+
     // Check that this query (and any subqueries) start with a get table op, and
     // that there are no following get operations. I.e., we have:
     //
diff --git a/oximeter/db/src/oxql/ast/table_ops/align.rs b/oximeter/db/src/oxql/ast/table_ops/align.rs
index b0cd7d80f1..3606af689c 100644
--- a/oximeter/db/src/oxql/ast/table_ops/align.rs
+++ b/oximeter/db/src/oxql/ast/table_ops/align.rs
@@ -19,6 +19,7 @@ use oxql_types::point::Values;
 use oxql_types::Alignment;
 use oxql_types::Table;
 use oxql_types::Timeseries;
+use std::fmt;
 use std::time::Duration;
 
 // The maximum factor by which an alignment operation may upsample data.
@@ -68,7 +69,7 @@ fn verify_max_upsampling_ratio(
 ///
 /// Alignment is used to produce data at the defined timestamps, so that samples
 /// from multiple timeseries may be combined or correlated in meaningful ways.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub struct Align {
     /// The alignment method, used to describe how data over the input period
     /// is used to generate an output sample.
@@ -87,6 +88,16 @@ pub struct Align {
     pub period: Duration,
 }
 
+impl std::fmt::Display for Align {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        let name = match self.method {
+            AlignmentMethod::Interpolate => "interpolate",
+            AlignmentMethod::MeanWithin => "mean_within",
+        };
+        write!(f, "{}({:?})", name, self.period)
+    }
+}
+
 impl Align {
     // Apply the alignment function to the set of tables.
     pub(crate) fn apply(
@@ -108,7 +119,7 @@ impl Align {
 }
 
 /// An alignment method.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub enum AlignmentMethod {
     /// Alignment is done by interpolating the output data at the specified
     /// period.
@@ -118,6 +129,15 @@ pub enum AlignmentMethod {
     MeanWithin,
 }
 
+impl fmt::Display for AlignmentMethod {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            AlignmentMethod::Interpolate => write!(f, "interpolate"),
+            AlignmentMethod::MeanWithin => write!(f, "mean_within"),
+        }
+    }
+}
+
 // Align the timeseries in a table by computing the average within each output
 // period.
 fn align_mean_within(
diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter/mod.rs
similarity index 91%
rename from oximeter/db/src/oxql/ast/table_ops/filter.rs
rename to oximeter/db/src/oxql/ast/table_ops/filter/mod.rs
index ad398da983..e125e0505c 100644
--- a/oximeter/db/src/oxql/ast/table_ops/filter.rs
+++ b/oximeter/db/src/oxql/ast/table_ops/filter/mod.rs
@@ -12,6 +12,7 @@ use crate::oxql::ast::literal::Literal;
 use crate::oxql::ast::logical_op::LogicalOp;
 use crate::oxql::ast::table_ops::limit::Limit;
 use crate::oxql::ast::table_ops::limit::LimitKind;
+use crate::oxql::schema::TableSchema;
 use crate::oxql::Error;
 use crate::shells::special_idents;
 use chrono::DateTime;
@@ -27,6 +28,9 @@ use oxql_types::Timeseries;
 use regex::Regex;
 use std::collections::BTreeSet;
 use std::fmt;
+use std::time::Duration;
+
+pub(crate) mod visit;
 
 /// An AST node for the `filter` table operation.
 ///
@@ -80,6 +84,105 @@ impl Filter {
         out
     }
 
+    // Recursively apply the visitor to all the filter nodes in self.
+    fn visit_fields<V>(&self, visitor: &V) -> Result<Option<V::Output>, Error>
+    where
+        V: visit::Visit,
+    {
+        match &self.expr {
+            FilterExpr::Simple(simple) => {
+                visitor.visit_simple(self.negated, &simple)
+            }
+            FilterExpr::Compound(compound) => {
+                let left = compound.left.visit_fields(visitor)?;
+                let right = compound.right.visit_fields(visitor)?;
+                let out = match (left, right) {
+                    (None, None) => None,
+                    (None, Some(single)) | (Some(single), None) => Some(single),
+                    (Some(left), Some(right)) => Some(visitor.combine(
+                        self.negated,
+                        left,
+                        right,
+                        compound.op,
+                    )),
+                };
+                Ok(out)
+            }
+        }
+    }
+
+    /// Remove any predicates on the datum of a table.
+    pub fn remove_datum(
+        &self,
+        schema: &TableSchema,
+    ) -> Result<Option<Self>, Error> {
+        let visitor = visit::RemoveDatum { schema };
+        self.visit_fields(&visitor)
+    }
+
+    /// Remove any predicates _not_ on the datum of a table.
+    pub fn only_datum(
+        &self,
+        schema: &TableSchema,
+    ) -> Result<Option<Self>, Error> {
+        let visitor = visit::OnlyDatum { schema };
+        self.visit_fields(&visitor)
+    }
+
+    /// Restrict predicates so that they apply only to the schema's fields.
+    pub fn restrict_to_fields(
+        &self,
+        schema: &TableSchema,
+    ) -> Result<Option<Self>, Error> {
+        let visitor = visit::RestrictToFields { schema };
+        self.visit_fields(&visitor)
+    }
+
+    /// Restrict predicates so that they apply only to the schema's
+    /// measurements.
+    pub fn restrict_to_measurements(
+        &self,
+        schema: &TableSchema,
+    ) -> Result<Option<Self>, Error> {
+        anyhow::ensure!(
+            schema.data_types.len() == 1,
+            "Rewriting measurement filters is only valid \
+            for 1-dimensional tables"
+        );
+        let visitor = visit::RestrictToMeasurements { schema };
+        self.visit_fields(&visitor)
+    }
+
+    /// Rewrite predicates so that they apply only to the schema's field tables.
+    ///
+    /// This returns the _database_ filters that can be applied in a `WHERE`
+    /// clause to implement the corresponding filter.
+    pub fn rewrite_for_field_tables(
+        &self,
+        schema: &TableSchema,
+    ) -> Result<Option<String>, Error> {
+        let visitor = visit::RewriteForFieldTables { schema };
+        self.visit_fields(&visitor)
+    }
+
+    /// Rewrite predicates so that they apply only to the schema's measurement
+    /// table.
+    ///
+    /// This returns the _database_ filters that can be applied in a `WHERE`
+    /// clause to implement the corresponding filter.
+    pub fn rewrite_for_measurement_table(
+        &self,
+        schema: &TableSchema,
+    ) -> Result<Option<String>, Error> {
+        anyhow::ensure!(
+            schema.data_types.len() == 1,
+            "Rewriting measurement filters is only valid \
+            for 1-dimensional tables"
+        );
+        let visitor = visit::RewriteForMeasurementTable { schema };
+        self.visit_fields(&visitor)
+    }
+
     fn flatten_disjunctions_inner(&self, dis: &mut Vec<Self>) {
         // Recursion is only needed if this is an OR expression. In that case,
         // we split the left and push it, and then recurse on the right.
@@ -231,6 +334,11 @@ impl Filter {
         Ok(Self { negated: self.negated, ..new })
     }
 
+    // Helper method to combine this filter with another, using an AND operator.
+    pub(crate) fn and(&self, other: &Filter) -> Self {
+        self.merge(other, LogicalOp::And)
+    }
+
     // Merge this filter with another one, using the provided operator.
     pub(crate) fn merge(&self, other: &Filter, op: LogicalOp) -> Self {
         Self {
@@ -301,7 +409,7 @@ impl Filter {
 
         // There are extra, implied names that depend on the data type of the
         // timeseries itself, check those as well.
-        let extras = implicit_field_names(first_timeseries);
+        let extras = implicit_field_names_for_timeseries(first_timeseries);
         let ident_names = self.ident_names();
         let not_valid = ident_names
             .iter()
@@ -483,21 +591,51 @@ impl Filter {
             }
         }
     }
+
+    /// Shift timestamps in any filters forward or backwards by this period,
+    /// depending on the direction implied by their comparison.
+    ///
+    /// E.g., `timestamp > t0` is shifted _backwards_ by `period`, so that it
+    /// ensures it captures `t0 - period`.
+    pub(crate) fn shift_timestamp_by(&self, period: Duration) -> Self {
+        let visitor = visit::ShiftTimestamps { period };
+        self.visit_fields(&visitor).unwrap().unwrap()
+    }
 }
 
 /// Return the names of the implicit fields / columns that a filter can apply
 /// to, based on the metric types of the contained data points.
-fn implicit_field_names(
+fn implicit_field_names_for_timeseries(
     first_timeseries: &Timeseries,
 ) -> BTreeSet<&'static str> {
-    let mut out = BTreeSet::new();
-
-    // Everything has a timestamp!
-    out.insert(special_idents::TIMESTAMP);
     let type_info = first_timeseries
         .points
         .metric_types()
         .zip(first_timeseries.points.data_types());
+    implicit_field_names_for_types(type_info)
+}
+
+/// Return the implicit field names a filter can apply to, from the given table
+/// schema.
+pub fn implicit_field_names_for_table_schema(
+    schema: &TableSchema,
+) -> BTreeSet<&'static str> {
+    implicit_field_names_for_types(
+        schema
+            .metric_types
+            .iter()
+            .copied()
+            .zip(schema.data_types.iter().copied()),
+    )
+}
+
+fn implicit_field_names_for_types(
+    type_info: impl Iterator<Item = (MetricType, DataType)>,
+) -> BTreeSet<&'static str> {
+    let mut out = BTreeSet::new();
+
+    // Everything has a timestamp!
+    out.insert(special_idents::TIMESTAMP);
     for (metric_type, data_type) in type_info {
         match (metric_type, data_type) {
             // Scalar gauges.
diff --git a/oximeter/db/src/oxql/ast/table_ops/filter/visit.rs b/oximeter/db/src/oxql/ast/table_ops/filter/visit.rs
new file mode 100644
index 0000000000..f68198c77c
--- /dev/null
+++ b/oximeter/db/src/oxql/ast/table_ops/filter/visit.rs
@@ -0,0 +1,494 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Methods for walking a tree of filter nodes and applying a function to them.
+
+// Copyright 2024 Oxide Computer Company
+
+use std::time::Duration;
+
+use super::CompoundFilter;
+use super::Filter;
+use super::FilterExpr;
+use super::SimpleFilter;
+use crate::oxql::ast::cmp::Comparison;
+use crate::oxql::ast::literal::Literal;
+use crate::oxql::ast::logical_op::LogicalOp;
+use crate::oxql::schema::TableSchema;
+use crate::oxql::Error;
+use crate::shells::special_idents;
+
+/// A trait for visiting a tree of filter nodes.
+///
+/// This is used to walk a tree of filter nodes and produce some oputput based
+/// on them. For example, this can be used to convert the tree into a string
+/// representation for use in the database.
+pub trait Visit {
+    /// The result of applying the visitor to a simple filter node.
+    type Output;
+
+    /// Visit one simple filter node, returning the output.
+    fn visit_simple(
+        &self,
+        negated: bool,
+        filter: &SimpleFilter,
+    ) -> Result<Option<Self::Output>, Error>;
+
+    /// Combine the output of previously-visited filter nodes.
+    fn combine(
+        &self,
+        negated: bool,
+        left: Self::Output,
+        right: Self::Output,
+        op: LogicalOp,
+    ) -> Self::Output;
+}
+
+/// A visitor that shifts timestamps.
+pub struct ShiftTimestamps {
+    pub period: Duration,
+}
+
+impl Visit for ShiftTimestamps {
+    type Output = Filter;
+
+    fn visit_simple(
+        &self,
+        negated: bool,
+        filter: &SimpleFilter,
+    ) -> Result<Option<Self::Output>, Error> {
+        let SimpleFilter { ident, cmp, value } = filter;
+        if ident.as_str() == special_idents::TIMESTAMP {
+            let Literal::Timestamp(timestamp) = value else {
+                anyhow::bail!("Filter expression is not a timestamp");
+            };
+            let new_timestamp = match cmp {
+                Comparison::Eq
+                | Comparison::Ne
+                | Comparison::Lt
+                | Comparison::Le => *timestamp,
+                Comparison::Gt | Comparison::Ge => *timestamp - self.period,
+                Comparison::Like => unreachable!(),
+            };
+            Ok(Some(Filter {
+                negated,
+                expr: FilterExpr::Simple(SimpleFilter {
+                    value: Literal::Timestamp(new_timestamp),
+                    ..filter.clone()
+                }),
+            }))
+        } else {
+            Ok(Some(Filter {
+                negated,
+                expr: FilterExpr::Simple(filter.clone()),
+            }))
+        }
+    }
+
+    fn combine(
+        &self,
+        negated: bool,
+        left: Self::Output,
+        right: Self::Output,
+        op: LogicalOp,
+    ) -> Self::Output {
+        Filter {
+            negated,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(left),
+                op,
+                right: Box::new(right),
+            }),
+        }
+    }
+}
+
+/// A visitor that _keeps_ filter expressions that refer to the datum.
+pub struct OnlyDatum<'a> {
+    pub schema: &'a TableSchema,
+}
+
+impl<'a> Visit for OnlyDatum<'a> {
+    type Output = Filter;
+
+    fn visit_simple(
+        &self,
+        negated: bool,
+        filter: &SimpleFilter,
+    ) -> Result<Option<Self::Output>, Error> {
+        // It should be a datum. Check and remove it.
+        if filter.ident.as_str() == special_idents::DATUM {
+            for ty in self.schema.data_types.iter().copied() {
+                anyhow::ensure!(
+                    filter.value.is_compatible_with_datum(ty),
+                    "Expression for datum on table {} is not \
+                    compatible with its type {}",
+                    self.schema.name,
+                    ty,
+                );
+            }
+            return Ok(Some(Filter {
+                negated,
+                expr: FilterExpr::Simple(filter.clone()),
+            }));
+        }
+
+        // Anything else is outta here.
+        Ok(None)
+    }
+
+    fn combine(
+        &self,
+        negated: bool,
+        left: Self::Output,
+        right: Self::Output,
+        op: LogicalOp,
+    ) -> Self::Output {
+        Filter {
+            negated,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(left),
+                op,
+                right: Box::new(right),
+            }),
+        }
+    }
+}
+
+/// A visitor that removes filter expressions that refer to the datum.
+pub struct RemoveDatum<'a> {
+    pub schema: &'a TableSchema,
+}
+
+impl<'a> Visit for RemoveDatum<'a> {
+    type Output = Filter;
+
+    fn visit_simple(
+        &self,
+        negated: bool,
+        filter: &SimpleFilter,
+    ) -> Result<Option<Self::Output>, Error> {
+        if let Some(field_type) = self.schema.field_type(filter.ident.as_str())
+        {
+            if !filter.value_type_is_compatible_with_field(*field_type) {
+                return Err(anyhow::anyhow!(
+                    "Expression for field {} is not compatible with \
+                    its type {}",
+                    filter.ident,
+                    field_type,
+                ));
+            }
+            return Ok(Some(Filter {
+                negated,
+                expr: FilterExpr::Simple(filter.clone()),
+            }));
+        }
+
+        // The relevant columns on which we filter depend on the datum
+        // type of the table. All tables support "timestamp".
+        let ident = filter.ident.as_str();
+        if ident == special_idents::TIMESTAMP {
+            if matches!(filter.value, Literal::Timestamp(_)) {
+                return Ok(Some(Filter {
+                    negated,
+                    expr: FilterExpr::Simple(filter.clone()),
+                }));
+            }
+            return Err(anyhow::anyhow!(
+                "Literal cannot be compared with a timestamp"
+            ));
+        }
+
+        // Check for comparison against the start time, which only works
+        // for cumulative tables.
+        if ident == special_idents::START_TIME {
+            if !self.schema.metric_types[0].is_cumulative() {
+                return Err(anyhow::anyhow!(
+                    "Start time can only be compared if the metric \
+                    is cumulative, but table '{}' has metric type {}",
+                    self.schema.name,
+                    &self.schema.metric_types[0],
+                ));
+            }
+            if matches!(filter.value, Literal::Timestamp(_)) {
+                return Ok(Some(Filter {
+                    negated,
+                    expr: FilterExpr::Simple(filter.clone()),
+                }));
+            }
+            return Err(anyhow::anyhow!(
+                "Literal cannot be compared with a timestamp"
+            ));
+        }
+
+        // It should be a datum. Check and remove it.
+        if ident == special_idents::DATUM {
+            return Ok(None);
+        }
+
+        // Anything else is a bug.
+        unreachable!("Filter identifier '{}' is not valid", filter.ident,);
+    }
+
+    fn combine(
+        &self,
+        negated: bool,
+        left: Self::Output,
+        right: Self::Output,
+        op: LogicalOp,
+    ) -> Self::Output {
+        Filter {
+            negated,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(left),
+                op,
+                right: Box::new(right),
+            }),
+        }
+    }
+}
+
+/// A visitor that restricts filter expressions to those that apply to the
+/// _fields_ of a table schema.
+pub struct RestrictToFields<'a> {
+    pub schema: &'a TableSchema,
+}
+
+impl<'a> Visit for RestrictToFields<'a> {
+    type Output = Filter;
+
+    fn visit_simple(
+        &self,
+        negated: bool,
+        filter: &SimpleFilter,
+    ) -> Result<Option<Self::Output>, Error> {
+        let Some(field_type) = self.schema.field_type(filter.ident.as_str())
+        else {
+            return Ok(None);
+        };
+        if !filter.value_type_is_compatible_with_field(*field_type) {
+            return Err(anyhow::anyhow!(
+                "Expression for field {} is not compatible with \
+                its type {}",
+                filter.ident,
+                field_type,
+            ));
+        }
+        Ok(Some(Filter { negated, expr: FilterExpr::Simple(filter.clone()) }))
+    }
+
+    fn combine(
+        &self,
+        negated: bool,
+        left: Self::Output,
+        right: Self::Output,
+        op: LogicalOp,
+    ) -> Self::Output {
+        Filter {
+            negated,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(left),
+                op,
+                right: Box::new(right),
+            }),
+        }
+    }
+}
+
+/// A visitor that restricts filter expressions to those that apply to the
+/// _measurements_ of a table schema.
+pub struct RestrictToMeasurements<'a> {
+    pub schema: &'a TableSchema,
+}
+
+impl<'a> Visit for RestrictToMeasurements<'a> {
+    type Output = Filter;
+
+    fn visit_simple(
+        &self,
+        negated: bool,
+        filter: &SimpleFilter,
+    ) -> Result<Option<Self::Output>, Error> {
+        // The relevant columns on which we filter depend on the datum
+        // type of the table. All tables support "timestamp".
+        let ident = filter.ident.as_str();
+        if ident == special_idents::TIMESTAMP {
+            if matches!(filter.value, Literal::Timestamp(_)) {
+                return Ok(Some(Filter {
+                    negated,
+                    expr: FilterExpr::Simple(filter.clone()),
+                }));
+            }
+            return Err(anyhow::anyhow!(
+                "Literal cannot be compared with a timestamp"
+            ));
+        }
+
+        // Check for comparison against the start time, which only works
+        // for cumulative tables.
+        if ident == special_idents::START_TIME {
+            if !self.schema.metric_types[0].is_cumulative() {
+                return Err(anyhow::anyhow!(
+                    "Start time can only be compared if the metric \
+                    is cumulative, but table '{}' has metric type {}",
+                    self.schema.name,
+                    &self.schema.metric_types[0],
+                ));
+            }
+            if matches!(filter.value, Literal::Timestamp(_)) {
+                return Ok(Some(Filter {
+                    negated,
+                    expr: FilterExpr::Simple(filter.clone()),
+                }));
+            }
+            return Err(anyhow::anyhow!(
+                "Literal cannot be compared with a timestamp"
+            ));
+        }
+
+        // We'll delegate to the actual table op to filter on any of the
+        // data columns.
+        //
+        // TODO-completeness: We should allow filtering here once we
+        // push deltas into the database.
+        Ok(None)
+    }
+
+    fn combine(
+        &self,
+        negated: bool,
+        left: Self::Output,
+        right: Self::Output,
+        op: LogicalOp,
+    ) -> Self::Output {
+        Filter {
+            negated,
+            expr: FilterExpr::Compound(CompoundFilter {
+                left: Box::new(left),
+                op,
+                right: Box::new(right),
+            }),
+        }
+    }
+}
+
+/// Rewrite the filters so that they apply to the database field tables only.
+pub struct RewriteForFieldTables<'a> {
+    pub schema: &'a TableSchema,
+}
+
+impl<'a> Visit for RewriteForFieldTables<'a> {
+    type Output = String;
+
+    fn visit_simple(
+        &self,
+        negated: bool,
+        filter: &SimpleFilter,
+    ) -> Result<Option<Self::Output>, Error> {
+        // If the predicate names a field in this table schema,
+        // return that predicate printed as a string. If not, we return
+        // None.
+        let Some(field_type) = self.schema.field_type(filter.ident.as_str())
+        else {
+            return Ok(None);
+        };
+        if !filter.value_type_is_compatible_with_field(*field_type) {
+            return Err(anyhow::anyhow!(
+                "Expression for field {} is not compatible with \
+                its type {}",
+                filter.ident,
+                field_type,
+            ));
+        }
+        let maybe_not = if negated { "NOT " } else { "" };
+        Ok(Some(format!("{}{}", maybe_not, filter.as_db_safe_string())))
+    }
+
+    fn combine(
+        &self,
+        negated: bool,
+        left: Self::Output,
+        right: Self::Output,
+        op: LogicalOp,
+    ) -> Self::Output {
+        let maybe_not = if negated { "NOT " } else { "" };
+        format!("{}{}({left}, {right})", maybe_not, op.as_db_function_name())
+    }
+}
+
+/// Rewrite filters so that they apply only to the database measurement table.
+pub struct RewriteForMeasurementTable<'a> {
+    pub schema: &'a TableSchema,
+}
+
+impl<'a> Visit for RewriteForMeasurementTable<'a> {
+    type Output = String;
+
+    fn visit_simple(
+        &self,
+        negated: bool,
+        filter: &SimpleFilter,
+    ) -> Result<Option<Self::Output>, Error> {
+        let maybe_not = if negated { "NOT " } else { "" };
+        // The relevant columns on which we filter depend on the datum
+        // type of the table. All tables support "timestamp".
+        let ident = filter.ident.as_str();
+        if ident == special_idents::TIMESTAMP {
+            if matches!(filter.value, Literal::Timestamp(_)) {
+                return Ok(Some(format!(
+                    "{}{}",
+                    maybe_not,
+                    filter.as_db_safe_string()
+                )));
+            }
+            return Err(anyhow::anyhow!(
+                "Literal cannot be compared with a timestamp"
+            ));
+        }
+
+        // Check for comparison against the start time, which only works
+        // for cumulative tables.
+        if ident == special_idents::START_TIME {
+            if !self.schema.metric_types[0].is_cumulative() {
+                return Err(anyhow::anyhow!(
+                    "Start time can only be compared if the metric \
+                    is cumulative, but table '{}' has metric type {}",
+                    self.schema.name,
+                    &self.schema.metric_types[0],
+                ));
+            }
+            if matches!(filter.value, Literal::Timestamp(_)) {
+                return Ok(Some(format!(
+                    "{}{}",
+                    maybe_not,
+                    filter.as_db_safe_string()
+                )));
+            }
+            return Err(anyhow::anyhow!(
+                "Literal cannot be compared with a timestamp"
+            ));
+        }
+
+        // We'll delegate to the actual table op to filter on any of the data
+        // columns.
+        //
+        // TODO-completeness: We should allow pushing the filters here once we
+        // push deltas into the database.
+        Ok(None)
+    }
+
+    fn combine(
+        &self,
+        negated: bool,
+        left: Self::Output,
+        right: Self::Output,
+        op: LogicalOp,
+    ) -> Self::Output {
+        let maybe_not = if negated { "NOT " } else { "" };
+        format!("{}{}({left}, {right})", maybe_not, op.as_db_function_name())
+    }
+}
+
+#[cfg(test)]
+mod tests {}
diff --git a/oximeter/db/src/oxql/ast/table_ops/group_by.rs b/oximeter/db/src/oxql/ast/table_ops/group_by.rs
index c48804a788..aeadc828ea 100644
--- a/oximeter/db/src/oxql/ast/table_ops/group_by.rs
+++ b/oximeter/db/src/oxql/ast/table_ops/group_by.rs
@@ -19,6 +19,7 @@ use oxql_types::Table;
 use oxql_types::Timeseries;
 use std::collections::btree_map::Entry;
 use std::collections::BTreeMap;
+use std::fmt;
 
 /// A table operation for grouping data by fields, apply a reducer to the
 /// remaining.
@@ -28,6 +29,20 @@ pub struct GroupBy {
     pub reducer: Reducer,
 }
 
+impl fmt::Display for GroupBy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "group_by [")?;
+        let n_idents = self.identifiers.len();
+        for (i, ident) in self.identifiers.iter().enumerate() {
+            write!(f, "{}", ident.as_str())?;
+            if i < n_idents - 1 {
+                write!(f, ", ")?;
+            }
+        }
+        write!(f, "], {}", self.reducer)
+    }
+}
+
 impl GroupBy {
     // Apply the group_by table operation.
     pub(crate) fn apply(&self, tables: &[Table]) -> Result<Vec<Table>, Error> {
@@ -423,6 +438,15 @@ pub enum Reducer {
     Sum,
 }
 
+impl fmt::Display for Reducer {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Reducer::Mean => write!(f, "mean"),
+            Reducer::Sum => write!(f, "sum"),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::{GroupBy, Reducer};
diff --git a/oximeter/db/src/oxql/ast/table_ops/limit.rs b/oximeter/db/src/oxql/ast/table_ops/limit.rs
index 89afb31a7c..e093246d68 100644
--- a/oximeter/db/src/oxql/ast/table_ops/limit.rs
+++ b/oximeter/db/src/oxql/ast/table_ops/limit.rs
@@ -8,7 +8,7 @@
 
 use anyhow::Error;
 use oxql_types::Table;
-use std::num::NonZeroUsize;
+use std::{fmt, num::NonZeroUsize};
 
 /// The kind of limiting operation
 #[derive(Clone, Copy, Debug, PartialEq)]
@@ -27,6 +27,17 @@ pub struct Limit {
     /// The number of points the timeseries is limited to.
     pub count: NonZeroUsize,
 }
+
+impl fmt::Display for Limit {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let kind = match self.kind {
+            LimitKind::First => "first",
+            LimitKind::Last => "last",
+        };
+        write!(f, "{kind} {}", self.count)
+    }
+}
+
 impl Limit {
     /// Apply the limit operation to the input tables.
     pub(crate) fn apply(&self, tables: &[Table]) -> Result<Vec<Table>, Error> {
diff --git a/oximeter/db/src/oxql/ast/table_ops/mod.rs b/oximeter/db/src/oxql/ast/table_ops/mod.rs
index 8b8d4cbe1b..5222bb4b19 100644
--- a/oximeter/db/src/oxql/ast/table_ops/mod.rs
+++ b/oximeter/db/src/oxql/ast/table_ops/mod.rs
@@ -13,6 +13,8 @@ pub mod group_by;
 pub mod join;
 pub mod limit;
 
+use std::fmt;
+
 use self::align::Align;
 use self::filter::Filter;
 use self::group_by::GroupBy;
@@ -36,6 +38,19 @@ pub enum BasicTableOp {
     Limit(Limit),
 }
 
+impl fmt::Display for BasicTableOp {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            BasicTableOp::Get(name) => write!(f, "get {name}"),
+            BasicTableOp::Filter(filter) => write!(f, "filter {filter}"),
+            BasicTableOp::GroupBy(group_by) => write!(f, "{group_by}"),
+            BasicTableOp::Join(_) => write!(f, "join"),
+            BasicTableOp::Align(align) => write!(f, "align {align}"),
+            BasicTableOp::Limit(limit) => write!(f, "{limit}"),
+        }
+    }
+}
+
 impl BasicTableOp {
     pub(crate) fn apply(
         &self,
@@ -59,6 +74,22 @@ pub struct GroupedTableOp {
     pub ops: Vec<Query>,
 }
 
+impl fmt::Display for GroupedTableOp {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{{ ")?;
+        let n_queries = self.ops.len();
+        for (i, query) in self.ops.iter().enumerate() {
+            write!(f, "{query}")?;
+            if i < n_queries - 1 {
+                write!(f, "; ")?;
+            } else {
+                write!(f, "")?;
+            }
+        }
+        write!(f, " }}")
+    }
+}
+
 /// Any kind of OxQL table operation.
 #[derive(Clone, Debug, PartialEq)]
 pub enum TableOp {
@@ -66,6 +97,15 @@ pub enum TableOp {
     Grouped(GroupedTableOp),
 }
 
+impl fmt::Display for TableOp {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            TableOp::Basic(basic) => write!(f, "{basic}"),
+            TableOp::Grouped(grouped) => write!(f, "{grouped}"),
+        }
+    }
+}
+
 impl TableOp {
     pub(crate) fn apply(
         &self,
diff --git a/oximeter/db/src/oxql/mod.rs b/oximeter/db/src/oxql/mod.rs
index fcdfb783c5..3e57e03821 100644
--- a/oximeter/db/src/oxql/mod.rs
+++ b/oximeter/db/src/oxql/mod.rs
@@ -10,13 +10,15 @@ use peg::error::ParseError as PegError;
 use peg::str::LineCol;
 
 pub mod ast;
+pub mod plan;
 pub mod query;
+pub mod schema;
 
 pub use self::query::Query;
 pub use anyhow::Error;
 
 /// Format a PEG parsing error into a nice anyhow error.
-fn fmt_parse_error(source: &str, err: PegError<LineCol>) -> Error {
+pub fn fmt_parse_error(source: &str, err: PegError<LineCol>) -> Error {
     use std::fmt::Write;
     let mut out =
         format!("Error at {}:{}", err.location.line, err.location.column);
diff --git a/oximeter/db/src/oxql/plan/align.rs b/oximeter/db/src/oxql/plan/align.rs
new file mode 100644
index 0000000000..8221728142
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/align.rs
@@ -0,0 +1,90 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! OxQL query plan node for aligning tables.
+
+// Copyright 2024 Oxide Computer Company
+
+use oxql_types::point::DataType;
+use oxql_types::point::MetricType;
+
+use crate::oxql::ast::table_ops::align;
+use crate::oxql::plan::plan::TableOpData;
+use crate::oxql::plan::plan::TableOpInput;
+use crate::oxql::plan::plan::TableOpOutput;
+use crate::oxql::schema::TableSchema;
+
+/// A node that aligns its input tables.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Align {
+    pub output: TableOpOutput,
+    pub alignment: align::Align,
+}
+
+impl Align {
+    /// Plan the application of the alignment operation to the input tables
+    pub fn new(
+        alignment: align::Align,
+        input: TableOpInput,
+    ) -> anyhow::Result<Self> {
+        let tables = input
+            .tables
+            .into_iter()
+            .map(|TableOpData { schema, .. }| {
+                align_input_schema(schema, alignment.method).map(|schema| {
+                    TableOpData { schema, alignment: Some(alignment) }
+                })
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+        let output = TableOpOutput { tables };
+        Ok(Align { output, alignment })
+    }
+
+    /// Print this plan node as a plan tree entry.
+    pub fn plan_tree_entry(&self) -> termtree::Tree<String> {
+        termtree::Tree::new(format!(
+            "align: method={}, period={:?}",
+            self.alignment.method, self.alignment.period
+        ))
+    }
+}
+
+// Align the input schema, returning the output schema it will produce.
+fn align_input_schema(
+    schema: TableSchema,
+    method: align::AlignmentMethod,
+) -> anyhow::Result<TableSchema> {
+    for metric_type in schema.metric_types.iter() {
+        anyhow::ensure!(
+            metric_type != &MetricType::Cumulative,
+            "Only gauge or delta metric types may be aligned, \
+            but table '{}' has cumulative metric type",
+            schema.name,
+        );
+    }
+    let mut data_types = Vec::with_capacity(schema.data_types.len());
+    for data_type in schema.data_types.iter() {
+        match (data_type, method) {
+            (
+                DataType::Integer | DataType::Double,
+                align::AlignmentMethod::MeanWithin,
+            ) => {
+                data_types.push(DataType::Double);
+            }
+            (
+                DataType::Integer | DataType::Double,
+                align::AlignmentMethod::Interpolate,
+            ) => {
+                anyhow::bail!(
+                    "Alignment via interpolation is not yet implemented"
+                );
+            }
+            (_, _) => anyhow::bail!(
+                "Tables with '{}' data types cannot be aligned",
+                data_type,
+            ),
+        }
+    }
+    Ok(TableSchema { data_types, ..schema })
+}
diff --git a/oximeter/db/src/oxql/plan/delta.rs b/oximeter/db/src/oxql/plan/delta.rs
new file mode 100644
index 0000000000..f3c22b8aca
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/delta.rs
@@ -0,0 +1,49 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Query plan node for constructing a delta from a cumulative timeseries.
+
+// Copyright 2024 Oxide Computer Company
+
+use oxql_types::point::MetricType;
+
+use crate::oxql::schema::TableSchema;
+
+/// A plan node for computing deltas from a cumulative timeseries.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Delta {
+    pub output: TableSchema,
+}
+
+impl Delta {
+    pub fn new(schema: &TableSchema) -> anyhow::Result<Self> {
+        anyhow::ensure!(
+            schema.metric_types.len() == 1,
+            "Deltas can only be applied to 1-dimensional tables",
+        );
+        anyhow::ensure!(
+            schema.metric_types[0] == MetricType::Cumulative,
+            "Deltas can only be applied to cumulative tables",
+        );
+        anyhow::ensure!(
+            schema.data_types[0].is_numeric()
+                || schema.data_types[0].is_distribution(),
+            "Deltas can only be applied to numeric or distribution \
+            data types, not {}",
+            schema.data_types[0],
+        );
+        let output = TableSchema {
+            metric_types: vec![MetricType::Delta],
+            ..schema.clone()
+        };
+        Ok(Self { output })
+    }
+
+    pub fn plan_tree_entry(&self) -> termtree::Tree<String> {
+        termtree::Tree::new(format!(
+            "delta: cumulative -> delta ({})",
+            self.output.data_types[0]
+        ))
+    }
+}
diff --git a/oximeter/db/src/oxql/plan/filter.rs b/oximeter/db/src/oxql/plan/filter.rs
new file mode 100644
index 0000000000..d6d840c48e
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/filter.rs
@@ -0,0 +1,216 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! OxQL query plan node for representing a filtering operation.
+
+// Copyright 2024 Oxide Computer Company
+
+use std::collections::BTreeSet;
+
+use crate::oxql::ast::literal::Literal;
+use crate::oxql::ast::table_ops::filter::{
+    self, implicit_field_names_for_table_schema,
+};
+use crate::oxql::ast::table_ops::limit;
+use crate::oxql::plan::plan::TableOpInput;
+use crate::oxql::plan::predicates::Predicates;
+use crate::oxql::schema::TableSchema;
+use crate::shells::special_idents;
+
+/// A node that filters its input tables.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Filter {
+    /// The input tables to the filter.
+    ///
+    /// Note that these are also the output tables, since a filter does not
+    /// change the schema of its inputs.
+    pub input: TableOpInput,
+
+    /// The predicates associated with this filter plan node.
+    ///
+    /// See the documentation of `Predicates` for details.
+    pub predicates: Predicates,
+}
+
+impl Filter {
+    /// Plan the application of the filter to the input tables.
+    pub fn new(
+        filter: &filter::Filter,
+        input: TableOpInput,
+    ) -> anyhow::Result<Self> {
+        Self::new_owned(filter.clone(), input)
+    }
+
+    /// Same a `Self::new`, but from an owned filter
+    pub fn new_owned(
+        filter: filter::Filter,
+        input: TableOpInput,
+    ) -> anyhow::Result<Self> {
+        for table in input.tables.iter() {
+            Self::ensure_filter_expr_application_is_valid(
+                &filter.expr,
+                &table.schema,
+            )?;
+        }
+        Ok(Filter { input, predicates: Predicates::Single(filter) })
+    }
+
+    pub fn plan_tree_entry(&self) -> termtree::Tree<String> {
+        let mut out =
+            termtree::Tree::new(String::from("filter")).with_multiline(true);
+        out.extend(self.predicates.plan_tree_entries());
+        out
+    }
+
+    pub fn from_predicates(
+        predicates: Predicates,
+        input: TableOpInput,
+    ) -> anyhow::Result<Self> {
+        for table in input.tables.iter() {
+            match &predicates {
+                Predicates::Single(filter) => {
+                    Self::ensure_filter_expr_application_is_valid(
+                        &filter.expr,
+                        &table.schema,
+                    )?;
+                }
+                Predicates::Disjunctions(disjunctions) => {
+                    for filter in
+                        disjunctions.iter().filter_map(|dis| dis.as_ref())
+                    {
+                        Self::ensure_filter_expr_application_is_valid(
+                            &filter.expr,
+                            &table.schema,
+                        )?;
+                    }
+                }
+            }
+        }
+        Ok(Filter { input, predicates })
+    }
+
+    /// Check that the provided filter is valid for a schema.
+    pub fn ensure_filter_expr_application_is_valid(
+        expr: &filter::FilterExpr,
+        schema: &TableSchema,
+    ) -> anyhow::Result<()> {
+        match expr {
+            filter::FilterExpr::Simple(simple) => {
+                let implicit_fields =
+                    implicit_field_names_for_table_schema(schema);
+                // Check that the identifier in the filter is in the table.
+                if let Some(type_) = schema.fields.get(simple.ident.as_str()) {
+                    // If it names a field, make sure we can apply the filter to
+                    // the field as well.
+                    anyhow::ensure!(
+                        simple.value.is_compatible_with_field(*type_),
+                        "Filter expression for field '{}' has type '{}' \
+                        which is not compatible with the field of \
+                        type '{}'",
+                        simple.ident,
+                        simple.value.type_name(),
+                        type_,
+                    );
+                } else if implicit_fields.contains(simple.ident.as_str()) {
+                    // And check that the filter expression is compatible with the
+                    // provided identifier type.
+                    Self::ensure_special_ident_is_compatible(
+                        simple.ident.as_str(),
+                        &simple.value,
+                        schema,
+                    )?;
+                } else {
+                    anyhow::bail!(
+                        "The filter expression refers to identifiers \
+                        that are not valid for its input tables. \
+                        Invalid identifiers: [{:?}], valid fields: {:?}",
+                        simple.ident.as_str(),
+                        schema
+                            .fields
+                            .keys()
+                            .map(String::as_str)
+                            .collect::<BTreeSet<_>>()
+                            .union(&implicit_fields)
+                            .collect::<BTreeSet<_>>()
+                    );
+                }
+            }
+            filter::FilterExpr::Compound(compound) => {
+                Self::ensure_filter_expr_application_is_valid(
+                    &compound.left.expr,
+                    &schema,
+                )?;
+                Self::ensure_filter_expr_application_is_valid(
+                    &compound.right.expr,
+                    &schema,
+                )?;
+            }
+        }
+        Ok(())
+    }
+
+    // Check that the provided lteral is valid for a special identifier.
+    fn ensure_special_ident_is_compatible(
+        ident: &str,
+        value: &Literal,
+        schema: &TableSchema,
+    ) -> anyhow::Result<()> {
+        if ident == special_idents::TIMESTAMP
+            || ident == special_idents::START_TIME
+        {
+            anyhow::ensure!(
+                matches!(value, Literal::Timestamp(_)),
+                "Filter expression must evaluate to a timestamp, \
+                but found type '{}'",
+                value.type_name(),
+            );
+        } else if ident == special_idents::DATUM {
+            // TODO-completeness: Support writing comparisons on the datum's value
+            // by _naming_ the metric, e.g., for `hardware_component:temperature`,
+            // writing `filter temperature > 30.0`. That's a bit tricky, since we
+            // need to alias the datum column in the SQL queries, or rewrite the
+            // filter to refer to the `datum` automatically.
+            anyhow::ensure!(
+                schema.data_types.len() == 1,
+                "The `datum` special identifier may only be used \
+                for 1-dimensional tables, but table '{}' has {} dimensions",
+                schema.name,
+                schema.data_types.len(),
+            );
+            let data_type = schema.data_types[0];
+            anyhow::ensure!(
+                value.is_compatible_with_datum(data_type),
+                "Filter expression with type '{}' is not comparable \
+                with datum of type '{}'",
+                value.type_name(),
+                data_type,
+            );
+        } else {
+            // TODO-completeness: Support comparison for special distribution
+            // identifiers: min, max, mean, p50, p90, p99, and possibly bins and
+            // counts.
+            anyhow::bail!(
+                "Filtering on the special identifier '{}' \
+                is not yet supported",
+                ident,
+            );
+        }
+        Ok(())
+    }
+
+    // Return true if we can reorder a limit around the provided filter.
+    pub fn can_reorder_around(&self, limit: &limit::Limit) -> bool {
+        match &self.predicates {
+            Predicates::Single(f) => f.can_reorder_around(limit),
+            Predicates::Disjunctions(disjunctions) => {
+                disjunctions.iter().all(|maybe_disjunction| {
+                    maybe_disjunction
+                        .as_ref()
+                        .map(|filter| filter.can_reorder_around(limit))
+                        .unwrap_or(true)
+                })
+            }
+        }
+    }
+}
diff --git a/oximeter/db/src/oxql/plan/get.rs b/oximeter/db/src/oxql/plan/get.rs
new file mode 100644
index 0000000000..3e56f65fdf
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/get.rs
@@ -0,0 +1,85 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! The `get` plan node, for fetching data from the ClickHouse database.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::ast::table_ops::filter;
+use crate::oxql::ast::table_ops::limit;
+use crate::oxql::schema::TableSchema;
+
+/// A node for fetching data from the named timeseries in the database.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Get {
+    /// The table schema we're fetching from.
+    ///
+    /// This is really a _timeseries_ schema, selecting the named timeseries
+    /// from the database. These two are synonymous at this point in a plan
+    /// tree, but we use the table schema for consistency with other plan nodes.
+    pub table_schema: TableSchema,
+
+    /// The filters applied to the database table for this schema.
+    ///
+    /// There is one entry here for every disjunction in the filters that we
+    /// ultimately push down into the get operation. E.g., for a filter like
+    /// `filter (x == 0 || x == 1)`, there will be two entries here, `filter (x
+    /// == 0)` and `filter x == 1`. These are used to construct "consistent key
+    /// groups", sets of timeseries keys that can all be fetched in one
+    /// combination of (field SQL query, measurements SQL query).
+    pub filters: Vec<filter::Filter>,
+
+    /// An optional limit to the number of samples selected from each
+    /// timeseries.
+    ///
+    /// This is both optional, and only of one kind ("first" or "last"). While a
+    /// query can express more than one, we currently only push one into the
+    /// database, which ever appears closer to the front of the query.
+    pub limit: Option<limit::Limit>,
+}
+
+impl Get {
+    /// Convert this node into an entry in a plan tree.
+    pub fn plan_tree_entry(&self) -> termtree::Tree<String> {
+        // Push each consistent key group as a child tree.
+        let mut subtrees = Vec::with_capacity(self.filters.len());
+        let mut any_is_full_scan = false;
+        for (i, filter) in self.filters.iter().enumerate() {
+            let mut subtree = termtree::Tree::new(format!("key group {i}"))
+                .with_multiline(true);
+            let mut is_full_scan = true;
+            subtree.push(format!(
+                "field filters={}",
+                filter
+                    .rewrite_for_field_tables(&self.table_schema)
+                    .unwrap()
+                    .inspect(|_| is_full_scan = false)
+                    .unwrap_or_else(|| String::from("[]"))
+            ));
+            subtree.push(format!(
+                "measurement filters={}",
+                filter
+                    .rewrite_for_measurement_table(&self.table_schema)
+                    .unwrap()
+                    .inspect(|_| is_full_scan = false)
+                    .unwrap_or_else(|| String::from("[]"))
+            ));
+            subtree.push(format!(
+                "full scan: {}",
+                if is_full_scan { "YES" } else { "no" }
+            ));
+            any_is_full_scan |= is_full_scan;
+            subtrees.push(subtree);
+        }
+        if subtrees.is_empty() || any_is_full_scan {
+            subtrees.push(termtree::Tree::new(String::from("full scan: YES")));
+        }
+        if let Some(limit) = &self.limit {
+            subtrees.push(termtree::Tree::new(format!("limit: {limit}")));
+        }
+        termtree::Tree::new(format!("get: \"{}\"", self.table_schema.name))
+            .with_multiline(true)
+            .with_leaves(subtrees)
+    }
+}
diff --git a/oximeter/db/src/oxql/plan/group_by.rs b/oximeter/db/src/oxql/plan/group_by.rs
new file mode 100644
index 0000000000..a6b1c3560e
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/group_by.rs
@@ -0,0 +1,134 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! OxQL query plan node for grouping data by fields.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::ast::table_ops::group_by;
+use crate::oxql::plan::plan::TableOpData;
+use crate::oxql::plan::plan::TableOpInput;
+use crate::oxql::schema::TableSchema;
+use oxql_types::point::DataType;
+use oxql_types::point::MetricType;
+use std::collections::BTreeMap;
+use std::fmt::Write as _;
+
+/// A node that groups input tables with the same values for the listed field.
+#[derive(Clone, Debug, PartialEq)]
+pub struct GroupBy {
+    pub output: TableOpData,
+    pub group_by: group_by::GroupBy,
+}
+
+impl GroupBy {
+    /// Plan the application of a group_by operation to the input tables.
+    pub fn new(
+        group_by: &group_by::GroupBy,
+        input: TableOpInput,
+    ) -> anyhow::Result<Self> {
+        anyhow::ensure!(
+            input.tables.len() == 1,
+            "`group_by` table operations require exactly one input \
+            table, but found {}",
+            input.tables.len(),
+        );
+        let table = input
+            .tables
+            .into_iter()
+            .next()
+            .expect("table operations must have at least one schema");
+        anyhow::ensure!(
+            table.alignment.is_some(),
+            "Input table to a `group_by` table operation must be aligned, \
+            but table '{}' is not aligned",
+            table.schema.name,
+        );
+        anyhow::ensure!(
+            table.schema.metric_types.len() == 1,
+            "`group_by` table operations require 1-dimensional tables, but \
+            table '{}' has {} dimensions",
+            table.schema.name,
+            table.schema.metric_types.len(),
+        );
+        anyhow::ensure!(
+            table.schema.metric_types[0] != MetricType::Cumulative,
+            "`group_by` table operations require delta or gauge metric \
+            types, but table '{}' is cumulative",
+            table.schema.name,
+        );
+        anyhow::ensure!(
+            table.schema.data_types[0].is_numeric(),
+            "`group_by` table operations require numeric data types, \
+            but table '{}' has data type '{}'",
+            table.schema.name,
+            table.schema.data_types[0],
+        );
+        let output_data_type =
+            match (&table.schema.data_types[0], group_by.reducer) {
+                (DataType::Integer, group_by::Reducer::Sum) => {
+                    DataType::Integer
+                }
+                (
+                    DataType::Double,
+                    group_by::Reducer::Mean | group_by::Reducer::Sum,
+                )
+                | (DataType::Integer, group_by::Reducer::Mean) => {
+                    DataType::Double
+                }
+                (ty, _) => anyhow::bail!(
+                    "`group_by` table operations require numeric data types, \
+                    but table '{}' has data type '{}'",
+                    table.schema.name,
+                    ty,
+                ),
+            };
+        let mut output_fields = BTreeMap::new();
+        for ident in group_by.identifiers.iter() {
+            let Some(type_) = table.schema.fields.get(ident.as_str()) else {
+                anyhow::bail!(
+                    "Cannot group by field '{}', which does not appear in the \
+                    input tables. Valid fields are: {:?}",
+                    ident.as_str(),
+                    table
+                        .schema
+                        .fields
+                        .keys()
+                        .map(String::as_str)
+                        .collect::<Vec<_>>(),
+                );
+            };
+            output_fields.insert(ident.to_string(), *type_);
+        }
+        let output_schema = TableSchema {
+            data_types: vec![output_data_type],
+            fields: output_fields,
+            ..table.schema
+        };
+        let output =
+            TableOpData { schema: output_schema, alignment: table.alignment };
+        Ok(GroupBy { output, group_by: group_by.clone() })
+    }
+
+    fn output_data_type(&self) -> DataType {
+        self.output.schema.data_types[0]
+    }
+
+    /// Print this node as a plan tree entry.
+    pub fn plan_tree_entry(&self) -> termtree::Tree<String> {
+        let mut out = String::from("group_by: fields=[");
+        let n_fields = self.group_by.identifiers.len();
+        for (i, field) in self.group_by.identifiers.iter().enumerate() {
+            out.push_str(field.as_str());
+            if i < n_fields - 1 {
+                out.push(',');
+            }
+        }
+        out.push_str("], reducer=");
+        write!(out, "{}, ", self.group_by.reducer).unwrap();
+        out.push_str("output type=");
+        write!(out, "{}", self.output_data_type()).unwrap();
+        termtree::Tree::new(out)
+    }
+}
diff --git a/oximeter/db/src/oxql/plan/join.rs b/oximeter/db/src/oxql/plan/join.rs
new file mode 100644
index 0000000000..527b00baa0
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/join.rs
@@ -0,0 +1,101 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! OxQL query plan node for joining tables.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::plan::plan::TableOpData;
+use crate::oxql::plan::plan::TableOpInput;
+use crate::oxql::schema::TableSchema;
+
+/// A node that joins input tables with the same field values.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Join {
+    pub output: TableOpData,
+}
+
+impl Join {
+    /// Plan the application of a join node.
+    pub fn new(input: TableOpInput) -> anyhow::Result<Self> {
+        anyhow::ensure!(
+            input.tables.len() >= 2,
+            "`join` table operations require at least 2 tables",
+        );
+        let first = input
+            .tables
+            .first()
+            .expect("table operations must have at least one schema");
+        let Some(alignment) = first.alignment else {
+            anyhow::bail!(
+                "All input tables to a `join` operation must \
+                be aligned, but table '{}' is not aligned",
+                first.schema.name,
+            );
+        };
+        let fields = &first.schema.fields;
+        let mut names = Vec::with_capacity(input.tables.len());
+        let mut metric_types = Vec::with_capacity(input.tables.len());
+        let mut data_types = Vec::with_capacity(input.tables.len());
+        for table in input.tables.iter() {
+            let Some(this_alignment) = table.alignment else {
+                anyhow::bail!(
+                    "All input tables to a `join` operation must \
+                    be aligned, but table '{}' is not aligned",
+                    table.schema.name,
+                );
+            };
+            anyhow::ensure!(
+                this_alignment == alignment,
+                "All input tables to a `join` operation must have the \
+                same alignment, table '{}' was expected to be aligned \
+                with {}, but found {}",
+                table.schema.name,
+                alignment,
+                this_alignment,
+            );
+            anyhow::ensure!(
+                table.schema.metric_types.len() == 1,
+                "All input tables to `join` operation must be \
+                1-dimensional, but table '{}' has {} dimensions",
+                table.schema.name,
+                table.schema.metric_types.len(),
+            );
+            anyhow::ensure!(
+                &table.schema.fields == fields,
+                "All input tables to `join` operation must have \
+                the same field names and types, but table '{}' \
+                has fields [{}] and table '{}' has fields [{}]",
+                table.schema.name,
+                table
+                    .schema
+                    .fields
+                    .iter()
+                    .map(|(name, typ)| format!("\"{name}\" ({typ})"))
+                    .collect::<Vec<_>>()
+                    .join(", "),
+                first.schema.name,
+                fields
+                    .iter()
+                    .map(|(name, typ)| format!("\"{name}\" ({typ})"))
+                    .collect::<Vec<_>>()
+                    .join(", "),
+            );
+            names.push(table.schema.name.as_str());
+            metric_types.push(table.schema.metric_types[0]);
+            data_types.push(table.schema.data_types[0]);
+        }
+        let name = names.join(",");
+        let output = TableOpData {
+            schema: TableSchema {
+                name,
+                fields: fields.clone(),
+                metric_types,
+                data_types,
+            },
+            alignment: Some(alignment),
+        };
+        Ok(Self { output })
+    }
+}
diff --git a/oximeter/db/src/oxql/plan/limit.rs b/oximeter/db/src/oxql/plan/limit.rs
new file mode 100644
index 0000000000..730e3dc82f
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/limit.rs
@@ -0,0 +1,17 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! OxQL query plan node for limiting data.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::ast::table_ops::limit;
+use crate::oxql::plan::plan::TableOpOutput;
+
+/// A plan node for limiting the number of samples per-timeseries.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Limit {
+    pub output: TableOpOutput,
+    pub limit: limit::Limit,
+}
diff --git a/oximeter/db/src/oxql/plan/mod.rs b/oximeter/db/src/oxql/plan/mod.rs
new file mode 100644
index 0000000000..f5dfdda8ae
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/mod.rs
@@ -0,0 +1,20 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Query plans for the Oximeter Query Language.
+
+// Copyright 2024 Oxide Computer Company
+
+mod align;
+mod delta;
+mod filter;
+mod get;
+mod group_by;
+mod join;
+mod limit;
+mod node;
+mod plan;
+mod predicates;
+
+pub use plan::Plan;
diff --git a/oximeter/db/src/oxql/plan/node.rs b/oximeter/db/src/oxql/plan/node.rs
new file mode 100644
index 0000000000..d762df6da8
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/node.rs
@@ -0,0 +1,113 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Nodes in an OxQL query plan.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::plan::align::Align;
+use crate::oxql::plan::delta::Delta;
+use crate::oxql::plan::filter::Filter;
+use crate::oxql::plan::get::Get;
+use crate::oxql::plan::group_by::GroupBy;
+use crate::oxql::plan::join::Join;
+use crate::oxql::plan::limit::Limit;
+use crate::oxql::plan::plan::TableOpData;
+use crate::oxql::plan::plan::TableOpOutput;
+use crate::oxql::plan::Plan;
+
+/// A node in the query plan.
+///
+/// This is roughly synonymous with the table operation, but includes more
+/// metadata about inputs and outputs for the operation. In addition, some query
+/// plan nodes do not have direct analogs in a query itself. For example, a
+/// `Delta` node is inserted automatically after fetching data from a cumulative
+/// table.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Node {
+    /// A node representing the plan of a subquery.
+    Subquery(Vec<Plan>),
+    /// A node for fetching data from the database.
+    Get(Get),
+    /// A node for computing deltas between adjacent samples for a cumulative
+    /// table.
+    ///
+    /// This node is always inserted implicitly after a `Get`, if the table
+    /// being selected is cumulative.
+    Delta(Delta),
+    /// A node for filtering data from its inputs.
+    Filter(Filter),
+    /// A node for aligning its input data to regular time intervals.
+    Align(Align),
+    /// A node for grouping timeseries within a table with the same field
+    /// values.
+    GroupBy(GroupBy),
+    /// A node for joining timeseries with the same field values from two or
+    /// more tables.
+    Join(Join),
+    /// A node that limits the number of points the timeseries of its input
+    /// tables.
+    Limit(Limit),
+}
+
+impl Node {
+    /// Return the output of a query plan node.
+    pub fn output(&self) -> TableOpOutput {
+        match self {
+            Node::Subquery(subplans) => {
+                // The output of a subquery is the last output from every
+                // subplan in the subquery.
+                let tables = subplans
+                    .iter()
+                    .flat_map(|plan| plan.output().tables)
+                    .collect();
+                TableOpOutput { tables }
+            }
+            Node::Get(Get { table_schema, .. }) => TableOpOutput {
+                tables: vec![TableOpData {
+                    schema: table_schema.clone(),
+                    alignment: None,
+                }],
+            },
+            Node::Delta(Delta { output }) => TableOpOutput {
+                tables: vec![TableOpData {
+                    schema: output.clone(),
+                    alignment: None,
+                }],
+            },
+            Node::Filter(Filter { input, .. }) => {
+                TableOpOutput { tables: input.tables.clone() }
+            }
+            Node::Align(Align { output, .. }) => output.clone(),
+            Node::GroupBy(GroupBy { output, .. }) => {
+                TableOpOutput { tables: vec![output.clone()] }
+            }
+            Node::Join(Join { output, .. }) => {
+                TableOpOutput { tables: vec![output.clone()] }
+            }
+            Node::Limit(Limit { output, .. }) => output.clone(),
+        }
+    }
+
+    /// Return a string summarizing a node as a plan tree entry.
+    ///
+    /// # Panics
+    ///
+    /// This panics for subquery plans, that should be printed separately using
+    /// the recurisve `to_plan_tree_impl()` method.
+    pub fn plan_tree_entry(&self) -> termtree::Tree<String> {
+        match self {
+            Node::Subquery(_) => unreachable!(),
+            Node::Get(get) => get.plan_tree_entry(),
+            Node::Delta(delta) => delta.plan_tree_entry(),
+            Node::Filter(filter) => filter.plan_tree_entry(),
+            Node::Align(align) => align.plan_tree_entry(),
+            Node::GroupBy(group_by) => group_by.plan_tree_entry(),
+            Node::Join(_) => termtree::Tree::new(String::from("join")),
+            Node::Limit(limit) => {
+                termtree::Tree::new(format!("{}", limit.limit))
+            }
+        }
+    }
+}
diff --git a/oximeter/db/src/oxql/plan/plan.rs b/oximeter/db/src/oxql/plan/plan.rs
new file mode 100644
index 0000000000..643196f014
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/plan.rs
@@ -0,0 +1,1366 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! The top-level query plan itself.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::ast::table_ops::align;
+use crate::oxql::ast::table_ops::BasicTableOp;
+use crate::oxql::ast::table_ops::GroupedTableOp;
+use crate::oxql::ast::table_ops::TableOp;
+use crate::oxql::ast::Query;
+use crate::oxql::plan::align::Align;
+use crate::oxql::plan::delta::Delta;
+use crate::oxql::plan::filter::Filter;
+use crate::oxql::plan::get::Get;
+use crate::oxql::plan::group_by::GroupBy;
+use crate::oxql::plan::join::Join;
+use crate::oxql::plan::limit::Limit;
+use crate::oxql::plan::node::Node;
+use crate::oxql::plan::predicates::Predicates;
+use crate::oxql::plan::predicates::SplitPredicates;
+use crate::oxql::schema::TableSchema;
+use anyhow::Context as _;
+use oximeter::TimeseriesName;
+use oximeter::TimeseriesSchema;
+use oxql_types::point::MetricType;
+use std::collections::BTreeMap;
+use std::collections::VecDeque;
+use std::fmt;
+use std::time::Duration;
+use std::time::Instant;
+
+/// The data for a table operation, either input or output.
+#[derive(Clone, Debug, PartialEq)]
+pub struct TableOpData {
+    /// The table schema.
+    pub schema: TableSchema,
+    /// The alignment method of the table, if any.
+    pub alignment: Option<align::Align>,
+}
+
+impl fmt::Display for TableOpData {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        writeln!(f, "Table: {}", self.schema.name)?;
+        writeln!(
+            f,
+            "  Metric types: [{}]",
+            self.schema
+                .metric_types
+                .iter()
+                .map(|ty| ty.to_string())
+                .collect::<Vec<_>>()
+                .join(",")
+        )?;
+        writeln!(
+            f,
+            "  Data types: [{}]",
+            self.schema
+                .data_types
+                .iter()
+                .map(|ty| ty.to_string())
+                .collect::<Vec<_>>()
+                .join(",")
+        )?;
+        writeln!(f, "  Fields:")?;
+        for (name, ty) in self.schema.fields.iter() {
+            writeln!(f, "    {name}: {ty}")?;
+        }
+        if let Some(alignment) = &self.alignment {
+            write!(f, "  Alignment: {:?}", alignment.period)
+        } else {
+            write!(f, "  Alignment: none")
+        }
+    }
+}
+
+/// The input tables to a table operation.
+#[derive(Clone, Debug, PartialEq)]
+pub struct TableOpInput {
+    pub tables: Vec<TableOpData>,
+}
+
+impl From<TableOpInput> for TableOpOutput {
+    fn from(input: TableOpInput) -> Self {
+        Self { tables: input.tables }
+    }
+}
+
+/// The output tables from a table operation.
+#[derive(Clone, Debug, PartialEq)]
+pub struct TableOpOutput {
+    pub tables: Vec<TableOpData>,
+}
+
+impl fmt::Display for TableOpOutput {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        for table in self.tables.iter() {
+            writeln!(f, "{table}")?;
+        }
+        Ok(())
+    }
+}
+
+impl TableOpOutput {
+    /// Convert this output into an input.
+    pub fn into_input(self) -> TableOpInput {
+        TableOpInput::from(self)
+    }
+
+    /// Return the number of tables a table operation emits.
+    pub fn n_tables(&self) -> usize {
+        self.tables.len()
+    }
+}
+
+impl From<TableOpOutput> for TableOpInput {
+    fn from(output: TableOpOutput) -> Self {
+        Self { tables: output.tables }
+    }
+}
+
+// Helper to distinguish when a plan has changed during optimized.
+#[derive(Clone, Debug, PartialEq)]
+enum OptimizedPlan {
+    Unchanged(Vec<Node>),
+    Optimized(Vec<Node>),
+}
+
+impl OptimizedPlan {
+    // Print this optimized plan as a tree of plan nodes
+    fn to_plan_tree(&self) -> termtree::Tree<String> {
+        let nodes = match self {
+            OptimizedPlan::Unchanged(nodes) => nodes,
+            OptimizedPlan::Optimized(nodes) => nodes,
+        };
+        let mut root =
+            termtree::Tree::new(String::from("•")).with_multiline(true);
+        Plan::to_plan_tree_impl(&mut root, nodes, /* optimized = */ true);
+        root
+    }
+
+    // Return the nodes in `self`.
+    fn nodes(&self) -> &[Node] {
+        match self {
+            OptimizedPlan::Unchanged(nodes) => nodes,
+            OptimizedPlan::Optimized(nodes) => nodes,
+        }
+    }
+
+    // Check if the plan has any full table scans.
+    fn requires_full_table_scan(&self) -> bool {
+        let nodes = match self {
+            OptimizedPlan::Unchanged(nodes) => nodes,
+            OptimizedPlan::Optimized(nodes) => nodes,
+        };
+        Self::requires_full_table_scan_impl(nodes)
+    }
+
+    // Check if the plan has any full table scans.
+    fn requires_full_table_scan_impl(nodes: &[Node]) -> bool {
+        nodes.iter().any(|node| match node {
+            Node::Subquery(subplans) => {
+                subplans.iter().any(Plan::requires_full_table_scan)
+            }
+            Node::Get(get) => get.filters.is_empty(),
+            Node::Delta(_)
+            | Node::Filter(_)
+            | Node::Align(_)
+            | Node::GroupBy(_)
+            | Node::Join(_)
+            | Node::Limit(_) => false,
+        })
+    }
+}
+
+/// An OxQL query plan.
+#[derive(Clone, Debug)]
+pub struct Plan {
+    /// The original parsed query.
+    pub query: Query,
+    // The original query plan.
+    nodes: Vec<Node>,
+    // The possibly optimized, rewritten query plan.
+    optimized: OptimizedPlan,
+    /// Time spent generating the query plan.
+    pub duration: Duration,
+}
+
+impl PartialEq for Plan {
+    fn eq(&self, other: &Self) -> bool {
+        self.query.eq(&other.query)
+            && self.nodes.eq(&other.nodes)
+            && self.optimized.eq(&other.optimized)
+    }
+}
+
+impl Plan {
+    /// Generate a plan for the provided query.
+    ///
+    /// The provided `schema` must include the timeseries schema for any
+    /// timeseries referred to in the orginal query. I.e., it lists the schema
+    /// for the raw data to be selected from the database.
+    pub fn new(
+        query: Query,
+        schema: &BTreeMap<TimeseriesName, TimeseriesSchema>,
+    ) -> anyhow::Result<Self> {
+        let start = Instant::now();
+        let mut nodes = Vec::with_capacity(query.table_ops().len());
+        Self::plan_query(&query, &schema, &mut nodes)?;
+        let optimized = Self::optimize_plan(&nodes)?;
+        let duration = start.elapsed();
+        if let OptimizedPlan::Optimized(optimized_plan) = &optimized {
+            let original_output =
+                nodes.last().expect("plan cannot be empty").output();
+            let optimized_output =
+                optimized_plan.last().expect("plan cannot be empty").output();
+            assert_eq!(
+                original_output, optimized_output,
+                "Query optimization resulted in different outputs!\n\
+                original = {:?}\n\
+                optimized = {:?}",
+                original_output, optimized_output,
+            );
+        }
+        Ok(Self { query, nodes, optimized, duration })
+    }
+
+    /// Pretty-print the plan as a tree of plan nodes.
+    pub fn to_plan_tree(&self) -> (String, String) {
+        let mut root =
+            termtree::Tree::new(String::from("•")).with_multiline(true);
+        Self::to_plan_tree_impl(
+            &mut root,
+            &self.nodes,
+            /* optimized = */ false,
+        );
+        let original_tree = root.to_string();
+        let optimized_tree = self.optimized.to_plan_tree();
+        (original_tree, optimized_tree.to_string())
+    }
+
+    /// Return the table schema for the output of a plan.
+    pub fn output(&self) -> TableOpOutput {
+        self.nodes.last().expect("plan cannot be empty").output()
+    }
+
+    // Return the new, possibly optimized plan nodes.
+    #[cfg(test)]
+    pub fn optimized_nodes(&self) -> &[Node] {
+        self.optimized.nodes()
+    }
+
+    // Return the original, unoptimized plan nodes.
+    #[cfg(test)]
+    pub fn nodes(&self) -> &[Node] {
+        &self.nodes
+    }
+
+    // Push nodes of the plan onto the tree, recursively.
+    fn to_plan_tree_impl(
+        root: &mut termtree::Tree<String>,
+        mut nodes: &[Node],
+        optimized: bool,
+    ) {
+        if let Node::Subquery(plans) = &nodes[0] {
+            let subplans = plans.iter().map(|plan| {
+                let mut root =
+                    termtree::Tree::new(format!("•  {}", plan.query))
+                        .with_multiline(true);
+                let nodes = if optimized {
+                    plan.optimized.nodes()
+                } else {
+                    &plan.nodes
+                };
+                Self::to_plan_tree_impl(&mut root, nodes, optimized);
+                root
+            });
+            root.extend(subplans);
+            nodes = &nodes[1..];
+        }
+        for node in nodes.iter() {
+            root.push(node.plan_tree_entry());
+        }
+    }
+
+    // Plan the provided query, inserting the plan nodes into the list.
+    fn plan_query(
+        query: &Query,
+        schema: &BTreeMap<TimeseriesName, TimeseriesSchema>,
+        nodes: &mut Vec<Node>,
+    ) -> anyhow::Result<()> {
+        for table_op in query.table_ops() {
+            match table_op {
+                TableOp::Basic(basic) => {
+                    Self::plan_basic_table_op(&schema, basic, nodes)?
+                }
+                TableOp::Grouped(grouped) => {
+                    Self::plan_subquery(&schema, grouped, nodes)?
+                }
+            }
+        }
+        Ok(())
+    }
+
+    // Generate plan nodes for a basic table op.
+    fn plan_basic_table_op(
+        schema: &BTreeMap<TimeseriesName, TimeseriesSchema>,
+        op: &BasicTableOp,
+        nodes: &mut Vec<Node>,
+    ) -> anyhow::Result<()> {
+        match op {
+            BasicTableOp::Get(name) => {
+                let db_schema = schema.get(name).with_context(|| {
+                    format!("Schema for timeseries '{}' not found", name)
+                })?;
+                let table_schema = TableSchema::new(db_schema);
+                let node = Node::Get(Get {
+                    table_schema: table_schema.clone(),
+                    filters: vec![],
+                    limit: None,
+                });
+                nodes.push(node);
+
+                // Insert the plan node for computing deltas for a cumulative
+                // timeseries, if needed.
+                if table_schema.metric_types[0] == MetricType::Cumulative {
+                    let node = Node::Delta(Delta::new(&table_schema)?);
+                    nodes.push(node);
+                }
+            }
+            BasicTableOp::Filter(filter) => {
+                // A filter doesn't change the schema, but we do have to check
+                // that it applies to every input table.
+                let input = nodes
+                    .last()
+                    .expect("Must have a previous node")
+                    .output()
+                    .into_input();
+                let node = Node::Filter(Filter::new(filter, input)?);
+                nodes.push(node);
+            }
+            BasicTableOp::GroupBy(group_by) => {
+                // A group_by just drops the fields that are not named, and
+                // aggregates the others in a specific way.
+                let input = nodes
+                    .last()
+                    .expect("Must have a previous node")
+                    .output()
+                    .into_input();
+                let node = Node::GroupBy(GroupBy::new(group_by, input)?);
+                nodes.push(node);
+            }
+            BasicTableOp::Join(_) => {
+                // A join concatenates all the data from the input tables that
+                // have the same values for their fields. All tables have to
+                // have the same field names / types.
+                let inputs = nodes
+                    .last()
+                    .expect("Must have a previous node")
+                    .output()
+                    .into_input();
+                let node = Node::Join(Join::new(inputs)?);
+                nodes.push(node);
+            }
+            BasicTableOp::Align(align) => {
+                let input = nodes
+                    .last()
+                    .expect("Must have a previous node")
+                    .output()
+                    .into_input();
+                let node = Node::Align(Align::new(*align, input)?);
+                nodes.push(node);
+            }
+            BasicTableOp::Limit(limit) => {
+                // Limit operations do not modify anything about the data at
+                // all, so just return the input schema unchanged.
+                let output =
+                    nodes.last().expect("Must have a previous node").output();
+                nodes.push(Node::Limit(Limit { limit: *limit, output }));
+            }
+        }
+        Ok(())
+    }
+
+    // Generate a plan node for a grouped table op, i.e., a subquery.
+    //
+    // Note that subqueries are not optimized themselves. The entire query is
+    // optimized as a whole. That means `optimized` is empty.
+    fn plan_subquery(
+        schema: &BTreeMap<TimeseriesName, TimeseriesSchema>,
+        grouped: &GroupedTableOp,
+        nodes: &mut Vec<Node>,
+    ) -> anyhow::Result<()> {
+        let mut subplans = Vec::with_capacity(grouped.ops.len());
+        for query in grouped.ops.iter().cloned() {
+            let start = Instant::now();
+            let mut new_nodes = Vec::new();
+            Self::plan_query(&query, schema, &mut new_nodes)?;
+            subplans.push(Plan {
+                query,
+                nodes: new_nodes.clone(),
+                optimized: OptimizedPlan::Unchanged(new_nodes),
+                duration: start.elapsed(),
+            });
+        }
+        nodes.push(Node::Subquery(subplans));
+        Ok(())
+    }
+
+    // Optimize a plan, if possible.
+    //
+    // This attempts to apply a number of plan-rewriting steps, to make the
+    // query as efficient as possible. Today these steps include:
+    //
+    // - Predicate pushdown: moving filtering predicates as close to the data as
+    // possible
+    // - Limit pushdown: Moving `first` or `last` table operations as close to
+    // the data as possible.
+    //
+    // There is a lot of room for new steps here. The most obvious next
+    // candidates are:
+    //
+    // - Deltas: push the computation of deltas from cumulative timeseries into
+    // the database
+    // - Alignment: push alignment operations into the database
+    // - Constant evaluation: As we support richer expressions, such as writing
+    // filters like `filter x > 1 / 2` or `filter x > log(5)`, we can evaluate
+    // those expressions at query-plan time.
+    fn optimize_plan(nodes: &[Node]) -> anyhow::Result<OptimizedPlan> {
+        let optimized = Self::pushdown_predicates(nodes)?;
+        Self::pushdown_limit(optimized.nodes())
+    }
+
+    // Push down limit operations in the list of plan nodes.
+    fn pushdown_limit(nodes: &[Node]) -> anyhow::Result<OptimizedPlan> {
+        anyhow::ensure!(
+            !nodes.is_empty(),
+            "Planning error: plan nodes cannot be empty"
+        );
+        let mut modified = false;
+
+        // Collect nodes in the plan.
+        let mut remaining_nodes =
+            nodes.iter().cloned().collect::<VecDeque<_>>();
+        let mut processed_nodes = VecDeque::with_capacity(nodes.len());
+
+        while let Some(current_node) = remaining_nodes.pop_back() {
+            // What we do with the limit node depends on what's in front of it.
+            let Some(next_node) = remaining_nodes.pop_back() else {
+                // If there _isn't_ one, then the current node must be the start
+                // of the plan, and so push it and break out.
+                processed_nodes.push_front(current_node);
+                break;
+            };
+
+            // If this isn't a limit node, just push it and continue
+            let Node::Limit(limit) = current_node else {
+                processed_nodes.push_front(current_node);
+                processed_nodes.push_front(next_node);
+                continue;
+            };
+
+            match next_node {
+                Node::Subquery(subplans) => {
+                    // Push the limit onto the subquery plans and recurse.
+                    let new_subplans = subplans
+                        .into_iter()
+                        .map(|mut plan| {
+                            let start = Instant::now();
+                            let nodes = [
+                                plan.optimized.nodes(),
+                                &[Node::Limit(limit.clone())],
+                            ]
+                            .concat();
+                            plan.optimized = Self::pushdown_limit(&nodes)?;
+                            plan.duration += start.elapsed();
+                            Ok(plan)
+                        })
+                        .collect::<anyhow::Result<Vec<_>>>()?;
+                    processed_nodes.push_front(Node::Subquery(new_subplans));
+                    modified = true;
+                }
+                Node::Get(mut get) => {
+                    // If we've gotten here, we _may_ be able to push this into
+                    // the databse, but only if there isn't one already in the
+                    // get node.
+                    match get.limit.as_mut() {
+                        Some(existing) => {
+                            // There's already a limiting operation here. We may
+                            // be able to coalesce them, but only if they're the
+                            // same kind. If not, it's not correct to reorder
+                            // them, so we have to push the current node and
+                            // then the next (get) node onto the processed list.
+                            if existing.kind == limit.limit.kind {
+                                // Take the smaller of the two!
+                                existing.count =
+                                    existing.count.min(limit.limit.count);
+                                modified = true;
+                            } else {
+                                // These are different kinds, push them in
+                                // order, starting with the limit.
+                                processed_nodes.push_front(Node::Limit(limit));
+                            }
+                        }
+                        None => {
+                            let old = get.limit.replace(limit.limit);
+                            assert!(old.is_none());
+                            modified = true;
+                        }
+                    }
+
+                    // We always push the get node last.
+                    processed_nodes.push_front(Node::Get(get));
+                }
+                Node::Delta(_)
+                | Node::Align(_)
+                | Node::GroupBy(_)
+                | Node::Join(_) => {
+                    remaining_nodes.push_back(Node::Limit(limit));
+                    processed_nodes.push_front(next_node);
+                    modified = true;
+                }
+                Node::Filter(filter) => {
+                    // We might be able to reorder the limit through the filter,
+                    // but it depends on whether and how the filter references
+                    // timestamps. They need to point in the same "direction" --
+                    // see `can_reorder_around()` for details.
+                    if filter.can_reorder_around(&limit.limit) {
+                        processed_nodes.push_front(Node::Filter(filter));
+                        remaining_nodes.push_back(Node::Limit(limit));
+                        modified = true;
+                    } else {
+                        processed_nodes.push_front(Node::Limit(limit));
+                        processed_nodes.push_front(Node::Filter(filter));
+                    }
+                }
+                Node::Limit(mut other_limit) => {
+                    // We might be able to coalesce these, if they're of the
+                    // same kind. If they are not, push the current one, and
+                    // then start carrying through the next one.
+                    if limit.limit.kind == other_limit.limit.kind {
+                        other_limit.limit.count =
+                            other_limit.limit.count.min(limit.limit.count);
+                        remaining_nodes.push_back(Node::Limit(other_limit));
+                        modified = true;
+                    } else {
+                        processed_nodes.push_front(Node::Limit(limit));
+                        remaining_nodes.push_back(Node::Limit(other_limit));
+                    }
+                }
+            }
+        }
+
+        let out = processed_nodes.make_contiguous().to_vec();
+        if modified {
+            Ok(OptimizedPlan::Optimized(out))
+        } else {
+            Ok(OptimizedPlan::Unchanged(out))
+        }
+    }
+
+    // Push down predicates in the list of plan nodes.
+    fn pushdown_predicates(nodes: &[Node]) -> anyhow::Result<OptimizedPlan> {
+        Self::pushdown_predicates_impl(nodes, None)
+    }
+
+    // Recursive implementation of predicate pushdown.
+    fn pushdown_predicates_impl(
+        nodes: &[Node],
+        outer_predicates: Option<Predicates>,
+    ) -> anyhow::Result<OptimizedPlan> {
+        anyhow::ensure!(
+            !nodes.is_empty(),
+            "Planning error: plan nodes cannot be empty"
+        );
+
+        // Used to return correct variant of `OptimizedPlan`.
+        let mut modified = false;
+
+        // Collect the nodes in the plan.
+        //
+        // We'll process the query plan from back to front, pushing nodes onto
+        // the output plan as we consider them. This is so we can "push" the
+        // filters from the back of the plan towards the front, as we process
+        // nodes.
+        let mut remaining_nodes =
+            nodes.iter().cloned().collect::<VecDeque<_>>();
+        let mut processed_nodes = VecDeque::with_capacity(nodes.len());
+
+        // If we were provided with outer predicates, let's just add on a filter
+        // plan node to the provided plan. It will be processed like any other
+        // node.
+        if let Some(predicates) = outer_predicates {
+            let filt = Filter::from_predicates(
+                predicates,
+                nodes
+                    .last()
+                    .expect("length verified above")
+                    .output()
+                    .into_input(),
+            )?;
+            remaining_nodes.push_back(Node::Filter(filt));
+        };
+
+        // Process nodes in the query plan, in reverse.
+        while let Some(current_node) = remaining_nodes.pop_back() {
+            // What we do with the node depends on what's in front of it, so
+            // take the next node in the plan.
+            let Some(next_node) = remaining_nodes.pop_back() else {
+                // If there _isn't_ one, then the current node must be the start
+                // of the plan, and so push it and break out.
+                processed_nodes.push_front(current_node);
+                break;
+            };
+
+            // If the current node isn't a filter, then this pass doesn't modify
+            // it at all. Instead, just push it and continue. But! We have to
+            // push back the next node we just popped above.
+            let Node::Filter(current_filter) = current_node else {
+                processed_nodes.push_front(current_node);
+                remaining_nodes.push_back(next_node);
+                continue;
+            };
+
+            // At this point we are looking at a filter node, and the next one
+            // exists. We might be able to modify them in a few ways.
+            match next_node {
+                Node::Subquery(subplans) => {
+                    // We can push the filter into each one of the subquery
+                    // plans, as its outer predicates.
+                    let new_subplans = subplans
+                        .into_iter()
+                        .map(|mut plan| {
+                            let start = Instant::now();
+                            plan.optimized = Self::pushdown_predicates_impl(
+                                &plan.nodes,
+                                Some(current_filter.predicates.clone()),
+                            )?;
+                            plan.duration += start.elapsed();
+                            Ok(plan)
+                        })
+                        .collect::<anyhow::Result<Vec<_>>>()?;
+                    processed_nodes.push_front(Node::Subquery(new_subplans));
+                    modified = true;
+                }
+                Node::Get(mut get) => {
+                    // Push the filters into the get plan node.
+                    //
+                    // At this point, we're at the front of the query plan, so
+                    // the filters won't be pushed any farther. It's required
+                    // that every filter we push through be _non-empty_. See
+                    // `Filter` for details.
+                    get.filters = current_filter.predicates.to_required()?;
+                    processed_nodes.push_front(Node::Get(get));
+                    modified = true;
+                }
+                Node::Delta(ref delta) => {
+                    // We can _sometimes_ push a filter around a delta, but not
+                    // the entire filter expression in general.
+                    //
+                    // Delta nodes change the datum values for a cumulative
+                    // timeseries, by computing adjacent differences. In a
+                    // logical, unoptimized query plan, we always fetch the
+                    // timeseries and immediately (and implicitly) construct the
+                    // deltas. So to match the semantics of the literal query,
+                    // any filter on the datum must necessarily refer to those
+                    // differences, not the original, cumulative values.
+                    //
+                    // That means we can push through filter expressions, except
+                    // those portions which refer to the datum. Field filters
+                    // and any filters on the timestamps are valid as well. Note
+                    // that the latter are valid because computing differences
+                    // doesn't change the timestamps themselves, only the start
+                    // times.
+                    let SplitPredicates { pushed, not_pushed } =
+                        current_filter.predicates.split_around_delta(delta)?;
+                    if let Some(after) = not_pushed {
+                        let new_input = next_node.output().into_input();
+                        let filter = Filter::from_predicates(after, new_input)?;
+                        processed_nodes.push_front(Node::Filter(filter));
+                    }
+                    processed_nodes.push_front(next_node);
+
+                    // The filters we did push through need to go back on the
+                    // remaining node queue, with the next node's output as its
+                    // input.
+                    if let Some(before) = pushed {
+                        let new_input = remaining_nodes
+                            .back()
+                            .as_ref()
+                            .expect("Align cannot start a query")
+                            .output()
+                            .into_input();
+                        let filter =
+                            Filter::from_predicates(before, new_input.clone())?;
+                        remaining_nodes.push_back(Node::Filter(filter));
+                    }
+                    modified = true;
+                }
+                Node::Filter(mut next_filter) => {
+                    // If the next node is also a filter, we can just merge the
+                    // current one into it, and push it back onto the remaining
+                    // nodes. We'll process it again on the next pass through
+                    // the loop.
+                    let new_predicates = next_filter
+                        .predicates
+                        .and(&current_filter.predicates)?;
+                    next_filter.predicates = new_predicates;
+                    remaining_nodes.push_back(Node::Filter(next_filter));
+                    modified = true;
+                }
+                Node::Align(ref align) => {
+                    // We can also push a filter around an alignment operation,
+                    // but we may need to split it up.
+                    //
+                    // Filters that apply to the _fields_ can always be pushed,
+                    // but the filters that apply to the measurements cannot.
+                    // Separate these into a new set of filter nodes that come
+                    // before and after the alignment, respectively.
+                    let SplitPredicates { pushed, not_pushed } =
+                        current_filter.predicates.split_around_align(align)?;
+                    if let Some(after) = not_pushed {
+                        let new_input = next_node.output().into_input();
+                        let filter = Filter::from_predicates(after, new_input)?;
+                        processed_nodes.push_front(Node::Filter(filter));
+                    }
+                    processed_nodes.push_front(next_node);
+
+                    // The filters we did push through need to go back on the
+                    // remaining node queue, with the next node's output as its
+                    // input.
+                    if let Some(before) = pushed {
+                        let new_input = remaining_nodes
+                            .back()
+                            .as_ref()
+                            .expect("Align cannot start a query")
+                            .output()
+                            .into_input();
+                        let filter =
+                            Filter::from_predicates(before, new_input.clone())?;
+                        remaining_nodes.push_back(Node::Filter(filter));
+                    }
+                    modified = true;
+                }
+                Node::GroupBy(_) => {
+                    // We can always push a filter around a group by, but it's a
+                    // bit subtle.
+                    //
+                    // At this point in the plan creation, we have checked that
+                    // the filter node we're operating on only refers to fields
+                    // that are valid at this point in the query. That can only
+                    // be fields that are named in the group-by table operation!
+                    // Any of those can be pushed through without changing the
+                    // contents of the group. We do need to change the input
+                    // schema the filter considers, however, since the groupby
+                    // operation probably has a different one.
+                    processed_nodes.push_front(next_node);
+                    let input = remaining_nodes
+                        .back()
+                        .expect("group_by cannot start a query")
+                        .output()
+                        .into_input();
+                    let new_filter = Node::Filter(
+                        Filter::from_predicates(
+                            current_filter.predicates.clone(),
+                            input,
+                        )
+                        .context("planning error")?,
+                    );
+                    remaining_nodes.push_back(new_filter);
+                    modified = true;
+                }
+                Node::Join(_) => {
+                    // We can also always push a filter around a join operation.
+                    //
+                    // TODO-completeness: It would be very nice to figure out
+                    // how to refer to `datum`s in joins. Right now, we would
+                    // apply the filter to both, which is probably nonsensical.
+                    // We should consider letting folks name the metric portion
+                    // of the timeseries name / table name, in addition to the
+                    // special identifier `datum`.
+                    //
+                    // See https://github.com/oxidecomputer/omicron/issues/6761.
+                    processed_nodes.push_front(next_node);
+                    let input = remaining_nodes
+                        .back()
+                        .expect("join cannot start a query")
+                        .output()
+                        .into_input();
+                    let new_filter = Node::Filter(
+                        Filter::from_predicates(
+                            current_filter.predicates.clone(),
+                            input,
+                        )
+                        .context("planning error")?,
+                    );
+                    remaining_nodes.push_back(new_filter);
+                    modified = true;
+                }
+                Node::Limit(limit) => {
+                    // We _might_ be able to reorder the filter around the
+                    // limit, in a few cases. See `can_reorder_around()` for
+                    // details.
+                    if current_filter
+                        .predicates
+                        .can_reorder_around(&limit.limit)
+                    {
+                        // Push the limit node onto the output plan, and then
+                        // push the filter back on the remaining nodes.
+                        processed_nodes.push_front(Node::Limit(limit));
+                        remaining_nodes.push_back(Node::Filter(current_filter));
+                        modified = true;
+                    } else {
+                        // We can't reorder these -- push the filter, and then
+                        // put the limit back so we can process it on the next
+                        // pass.
+                        processed_nodes.push_back(Node::Filter(current_filter));
+                        remaining_nodes.push_front(Node::Limit(limit));
+                    }
+                }
+            }
+        }
+
+        // If this pass modified anything, return the new plan, else return
+        // nothing.
+        let output_nodes = processed_nodes.make_contiguous().to_vec();
+        if modified {
+            Ok(OptimizedPlan::Optimized(output_nodes))
+        } else {
+            Ok(OptimizedPlan::Unchanged(output_nodes))
+        }
+    }
+
+    /// Return true if this plan requires at least 1 full table scan.
+    pub fn requires_full_table_scan(&self) -> bool {
+        self.optimized.requires_full_table_scan()
+    }
+}
+
+#[cfg(test)]
+pub(super) mod test_utils {
+    use chrono::NaiveDateTime;
+    use oximeter::FieldSchema;
+    use oximeter::TimeseriesName;
+    use oximeter::TimeseriesSchema;
+    use std::collections::BTreeMap;
+    use tokio::sync::OnceCell;
+
+    const FILE: &str =
+        concat!(env!("CARGO_MANIFEST_DIR"), "/tests/timeseries-schema.json");
+
+    /// Type representing the direct JSON output of the timeseries_schema table.
+    #[derive(Clone, Debug, serde::Deserialize)]
+    struct DbTimeseriesSchema {
+        timeseries_name: TimeseriesName,
+        #[serde(rename = "fields.name")]
+        field_names: Vec<String>,
+        #[serde(rename = "fields.type")]
+        field_types: Vec<String>,
+        #[serde(rename = "fields.source")]
+        field_sources: Vec<String>,
+        datum_type: String,
+        created: String,
+    }
+
+    impl From<DbTimeseriesSchema> for TimeseriesSchema {
+        fn from(value: DbTimeseriesSchema) -> Self {
+            assert_eq!(value.field_names.len(), value.field_types.len());
+            assert_eq!(value.field_names.len(), value.field_sources.len());
+            let field_schema = value
+                .field_names
+                .into_iter()
+                .zip(value.field_types)
+                .zip(value.field_sources)
+                .map(|((name, field_type), source)| FieldSchema {
+                    name,
+                    field_type: field_type.parse().unwrap(),
+                    source: source.parse().unwrap(),
+                    description: String::new(),
+                })
+                .collect();
+            Self {
+                timeseries_name: value.timeseries_name,
+                description: Default::default(),
+                field_schema,
+                datum_type: value.datum_type.parse().unwrap(),
+                version: std::num::NonZeroU8::new(1).unwrap(),
+                authz_scope: oximeter::AuthzScope::Fleet,
+                units: oximeter::Units::None,
+                created: NaiveDateTime::parse_from_str(
+                    &value.created,
+                    "%Y-%m-%d %H:%M:%S%.9f",
+                )
+                .unwrap()
+                .and_utc(),
+            }
+        }
+    }
+
+    async fn load_schema() -> BTreeMap<TimeseriesName, TimeseriesSchema> {
+        let contents = tokio::fs::read_to_string(&FILE).await.unwrap();
+        contents
+            .lines()
+            .map(|line| {
+                let schema: DbTimeseriesSchema =
+                    serde_json::from_str(&line).unwrap();
+                (
+                    TimeseriesName::try_from(schema.timeseries_name.as_str())
+                        .unwrap(),
+                    schema.into(),
+                )
+            })
+            .collect()
+    }
+
+    static ALL_SCHEMA: OnceCell<BTreeMap<TimeseriesName, TimeseriesSchema>> =
+        OnceCell::const_new();
+
+    pub async fn all_schema(
+    ) -> &'static BTreeMap<TimeseriesName, TimeseriesSchema> {
+        ALL_SCHEMA.get_or_init(load_schema).await
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::oxql::ast::grammar::query_parser;
+    use crate::oxql::plan::node::Node;
+    use crate::oxql::plan::plan::test_utils::all_schema;
+    use crate::oxql::plan::Plan;
+    use oxql_types::point::DataType;
+    use oxql_types::point::MetricType;
+
+    #[tokio::test]
+    async fn get_gauge_plan_emits_one_node() {
+        const TIMESERIES_NAME: &str = "collection_target:cpus_provisioned";
+        let query =
+            query_parser::query(&format!("get {TIMESERIES_NAME}")).unwrap();
+        let all_schema = all_schema().await;
+        let plan = Plan::new(query, &all_schema).unwrap();
+
+        assert_eq!(
+            plan.nodes.len(),
+            1,
+            "getting a single table by name with no filters and \
+            a gauge metric type should result in one table operation",
+        );
+        let Node::Get(get) = &plan.nodes[0] else {
+            panic!("expected just a get plan node, found {:?}", plan.nodes[0]);
+        };
+        let original_schema =
+            all_schema.get(&TIMESERIES_NAME.parse().unwrap()).unwrap();
+        assert_eq!(get.table_schema.name, TIMESERIES_NAME);
+        assert_eq!(get.table_schema.metric_types, &[MetricType::Gauge]);
+        assert_eq!(get.table_schema.data_types, &[DataType::Integer]);
+        let mut n_seen = 0;
+        for field_schema in original_schema.field_schema.iter() {
+            let ty = get
+                .table_schema
+                .fields
+                .get(&field_schema.name)
+                .expect("Field should be found");
+            assert_eq!(ty, &field_schema.field_type);
+            n_seen += 1;
+        }
+        assert_eq!(n_seen, get.table_schema.fields.len());
+    }
+
+    #[tokio::test]
+    async fn get_cumulative_plan_emits_two_nodes() {
+        const TIMESERIES_NAME: &str = "physical_data_link:bytes_sent";
+        let query =
+            query_parser::query(&format!("get {TIMESERIES_NAME}")).unwrap();
+        let all_schema = all_schema().await;
+        let plan = Plan::new(query, &all_schema).unwrap();
+
+        assert_eq!(
+            plan.nodes.len(),
+            2,
+            "getting a single table by name with no filters and \
+            a cumulative metric type should result in two table operations",
+        );
+        let Node::Get(get) = &plan.nodes[0] else {
+            panic!("expected a get plan node, found {:?}", plan.nodes[0]);
+        };
+        let original_schema =
+            all_schema.get(&TIMESERIES_NAME.parse().unwrap()).unwrap();
+        assert_eq!(get.table_schema.name, TIMESERIES_NAME);
+        assert_eq!(get.table_schema.metric_types, &[MetricType::Cumulative]);
+        assert_eq!(get.table_schema.data_types, &[DataType::Integer]);
+        let mut n_seen = 0;
+        for field_schema in original_schema.field_schema.iter() {
+            let ty = get
+                .table_schema
+                .fields
+                .get(&field_schema.name)
+                .expect("Field should be found");
+            assert_eq!(ty, &field_schema.field_type);
+            n_seen += 1;
+        }
+        assert_eq!(n_seen, get.table_schema.fields.len());
+
+        let Node::Delta(delta) = &plan.nodes[1] else {
+            panic!("expected a delta plan node, found {:?}", plan.nodes[0]);
+        };
+        assert_eq!(delta.output.name, get.table_schema.name);
+        assert_eq!(delta.output.fields, get.table_schema.fields);
+        assert_eq!(delta.output.data_types, get.table_schema.data_types);
+        assert_eq!(delta.output.metric_types[0], MetricType::Delta);
+    }
+
+    #[tokio::test]
+    async fn subquery_plan_returns_multiple_tables() {
+        let query = query_parser::query(
+            "{ \
+                get physical_data_link:bytes_received; \
+                get physical_data_link:bytes_received \
+            }",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        assert_eq!(
+            plan.nodes.len(),
+            1,
+            "Outer query plan should have one node"
+        );
+        let Node::Subquery(plans) = &plan.nodes[0] else {
+            panic!(
+                "Plan should have generated a subquery node, found {:?}",
+                plan.nodes[0],
+            );
+        };
+        assert_eq!(plans.len(), 2, "Should have generated two subquery plans",);
+        assert_eq!(plans[0], plans[1], "Both subquery plans should be equal");
+        let nodes = &plans[0].nodes;
+        assert_eq!(nodes.len(), 2, "Each subquery should have 2 nodes");
+        assert!(matches!(nodes[0], Node::Get(_)));
+        assert!(matches!(nodes[1], Node::Delta(_)));
+
+        let output = plan.output();
+        assert_eq!(
+            output.tables.len(),
+            2,
+            "Query plan with 2 suqueries should emit 2 tables"
+        );
+        assert_eq!(output.tables[0], output.tables[1]);
+    }
+
+    #[tokio::test]
+    async fn group_by_plan_leaves_only_grouped_fields() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_received \
+                | align mean_within(20s) \
+                | group_by [sled_id, serial]",
+        )
+        .unwrap();
+        let all_schema = all_schema().await;
+        let plan = Plan::new(query, all_schema).unwrap();
+        assert_eq!(
+            plan.nodes.len(),
+            4,
+            "Should have four nodes (+1 is for implicit delta)"
+        );
+        let Node::GroupBy(group_by) = plan.nodes.last().unwrap() else {
+            panic!(
+                "expected a group_by as the last node, found {:?}",
+                plan.nodes.last().unwrap(),
+            );
+        };
+        let output = &group_by.output;
+        let plan_output = plan.output();
+        assert_eq!(plan_output.tables.len(), 1);
+        assert_eq!(
+            &plan_output.tables[0], output,
+            "plan output should be the output of the last node"
+        );
+        assert_eq!(
+            output.schema.fields.len(),
+            2,
+            "group_by should have only resulted in two fields, \
+            those listed in the table operation"
+        );
+        let timeseries_name =
+            "physical_data_link:bytes_received".parse().unwrap();
+        for field in ["sled_id", "serial"] {
+            let output_type = output.schema.fields[field];
+            let original_type = all_schema
+                .get(&timeseries_name)
+                .unwrap()
+                .field_schema
+                .iter()
+                .find_map(|s| {
+                    if s.name == field {
+                        Some(s.field_type)
+                    } else {
+                        None
+                    }
+                })
+                .unwrap();
+            assert_eq!(
+                output_type, original_type,
+                "group_by operation should not change field types"
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn filter_plan_node_does_not_change_table_schema() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent | filter serial == 'foo'",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        assert_eq!(plan.nodes.len(), 3);
+        let input = plan.nodes[1].output();
+        let output = plan.output();
+        assert_eq!(input, output, "Filter should not change the table schema");
+    }
+
+    #[tokio::test]
+    async fn limit_plan_node_does_not_change_table_schema() {
+        let query =
+            query_parser::query("get physical_data_link:bytes_sent | last 1")
+                .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        assert_eq!(plan.nodes.len(), 3);
+        let input = plan.nodes[1].output();
+        let output = plan.output();
+        assert_eq!(input, output, "Last should not change the table schema");
+    }
+
+    #[tokio::test]
+    async fn cannot_group_multiple_tables() {
+        let query = query_parser::query(
+            "{ \
+                get physical_data_link:bytes_sent; \
+                get physical_data_link:bytes_received \
+             } \
+                | align mean_within(10s) \
+                | group_by [sled_id]",
+        )
+        .unwrap();
+        let err = Plan::new(query, all_schema().await).expect_err(
+            "Should fail to plan query that groups multiple tables",
+        );
+        assert!(
+            err.to_string().contains("require exactly one input table"),
+            "Error message should complain about multiple tables, \
+            but the error message is: {:#?}",
+            err
+        );
+    }
+
+    #[tokio::test]
+    async fn cannot_join_one_table() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent \
+                | align mean_within(10s) \
+                | join",
+        )
+        .unwrap();
+        let err = Plan::new(query, all_schema().await)
+            .expect_err("Should fail to plan query that joins one tables");
+        assert!(
+            err.to_string().contains("require at least 2 tables"),
+            "Error message should complain about only having one \
+            table, but the error message is: {:#?}",
+            err
+        );
+    }
+
+    #[tokio::test]
+    async fn cannot_join_unaligned_tables() {
+        let query = query_parser::query(
+            "{ \
+                get physical_data_link:bytes_sent; \
+                get physical_data_link:bytes_received \
+             } | join",
+        )
+        .unwrap();
+        let err = Plan::new(query, all_schema().await).expect_err(
+            "Should fail to plan query that joins unaligned tables",
+        );
+        assert!(
+            err.to_string().contains("is not aligned"),
+            "Error message should complain that the input tables are \
+            not aligned, but the error message is: {:#?}",
+            err
+        );
+    }
+
+    #[tokio::test]
+    async fn cannot_group_unaligned_tables() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent | group_by [sled_id]",
+        )
+        .unwrap();
+        let err = Plan::new(query, all_schema().await).expect_err(
+            "Should fail to plan query that groups unaligned tables",
+        );
+        assert!(
+            err.to_string().contains("is not aligned"),
+            "Error message should complain that the input tables are \
+            not aligned, but the error message is: {:#?}",
+            err
+        );
+    }
+
+    #[tokio::test]
+    async fn cannot_align_non_numeric_tables() {
+        let query = query_parser::query(
+            "get http_service:request_latency_histogram | align mean_within(10s)"
+        ).unwrap();
+        let err = Plan::new(query, all_schema().await)
+            .expect_err("Should fail to plan query that aligns histograms");
+        assert!(
+            err.to_string().contains("cannot be aligned"),
+            "Error message should complain that histogram tables cannot \
+            be aligned, but the error message is: {:#?}",
+            err,
+        );
+    }
+
+    #[tokio::test]
+    async fn cannot_filter_with_incomparable_types() {
+        let query = query_parser::query(
+            "get http_service:request_latency_histogram | filter name == 0",
+        )
+        .unwrap();
+        let err = Plan::new(query, all_schema().await).expect_err(
+            "Should fail to plan query with an incomparable filter",
+        );
+        assert!(
+            err.to_string().contains("is not compatible with the field"),
+            "Error message should complain that a filter cannot compare \
+            a field against an incompatible type, but the error message is: {:#?}",
+            err,
+        );
+    }
+
+    #[tokio::test]
+    async fn predicate_pushdown_merges_neighboring_filter_nodes() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent \
+                | filter serial == 'foo' \
+                | filter link_name == 'bar'",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        let optimized_nodes = plan.optimized_nodes();
+        assert_eq!(optimized_nodes.len(), 2);
+        let Node::Get(get) = &optimized_nodes[0] else {
+            panic!("Expected a get node, found {:?}", &optimized_nodes[0]);
+        };
+        assert_eq!(get.filters.len(), 1);
+        assert!(matches!(&optimized_nodes[1], Node::Delta(_)));
+        assert_eq!(
+            plan.nodes.last().unwrap().output(),
+            optimized_nodes.last().unwrap().output()
+        );
+    }
+
+    #[tokio::test]
+    async fn predicate_pushdown_pushes_filter_nodes_through_group_by() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent \
+                | align mean_within(1m) \
+                | group_by [serial] \
+                | filter serial == 'foo'",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        let optimized_nodes = plan.optimized_nodes();
+        assert_eq!(optimized_nodes.len(), 4);
+        let Node::Get(get) = &optimized_nodes[0] else {
+            panic!("Expected a get node, found {:?}", &optimized_nodes[0]);
+        };
+        assert_eq!(get.filters.len(), 1);
+        assert_eq!(
+            plan.nodes.last().unwrap().output(),
+            optimized_nodes.last().unwrap().output()
+        );
+    }
+
+    #[tokio::test]
+    async fn predicate_pushdown_pushes_filter_nodes_through_join() {
+        let query = query_parser::query(
+            "{ \
+                get physical_data_link:bytes_sent; \
+                get physical_data_link:bytes_received \
+            } \
+                | align mean_within(1m) \
+                | join
+                | filter serial == 'foo'",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        let optimized_nodes = plan.optimized_nodes();
+        assert_eq!(optimized_nodes.len(), 3);
+        let Node::Subquery(subqueries) = &optimized_nodes[0] else {
+            panic!("Expected a subquery node, found {:?}", &optimized_nodes[0]);
+        };
+        for subq in subqueries.iter() {
+            let nodes = subq.optimized_nodes();
+            assert_eq!(nodes.len(), 2);
+            let Node::Get(get) = &nodes[0] else {
+                panic!("Expected a get node, found {:?}", &nodes[0]);
+            };
+            assert_eq!(get.filters.len(), 1);
+            assert!(matches!(&nodes[1], Node::Delta(_)));
+        }
+        assert!(matches!(&optimized_nodes[1], Node::Align(_)));
+        assert!(matches!(&optimized_nodes[2], Node::Join(_)));
+        assert_eq!(
+            plan.nodes.last().unwrap().output(),
+            optimized_nodes.last().unwrap().output()
+        );
+    }
+
+    #[tokio::test]
+    async fn predicate_pushdown_pushes_filter_nodes_into_subqueries() {
+        let query = query_parser::query(
+            "{ \
+                get physical_data_link:bytes_sent; \
+                get physical_data_link:bytes_received \
+            } | filter serial == 'foo'",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        let optimized_nodes = plan.optimized_nodes();
+        assert_eq!(optimized_nodes.len(), 1);
+        let Node::Subquery(subqueries) = &optimized_nodes[0] else {
+            panic!("Expected a subquery node, found {:?}", &optimized_nodes[0]);
+        };
+        for subq in subqueries.iter() {
+            let nodes = subq.optimized_nodes();
+            assert_eq!(nodes.len(), 2);
+            let Node::Get(get) = &nodes[0] else {
+                panic!("Expected a get node, found {:?}", &nodes[0]);
+            };
+            assert_eq!(get.filters.len(), 1);
+            assert!(matches!(&nodes[1], Node::Delta(_)));
+        }
+        assert_eq!(
+            plan.nodes.last().unwrap().output(),
+            optimized_nodes.last().unwrap().output()
+        );
+    }
+
+    #[tokio::test]
+    async fn requires_full_table_scan() {
+        let query =
+            query_parser::query("get physical_data_link:bytes_sent").unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        assert!(plan.requires_full_table_scan());
+
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent | filter link_name == 'foo'",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        assert!(!plan.requires_full_table_scan());
+    }
+}
diff --git a/oximeter/db/src/oxql/plan/predicates.rs b/oximeter/db/src/oxql/plan/predicates.rs
new file mode 100644
index 0000000000..bbf08a66b1
--- /dev/null
+++ b/oximeter/db/src/oxql/plan/predicates.rs
@@ -0,0 +1,649 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Operate on predicates in a query plan, including spliting, merging and
+//! reordering.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::oxql::ast::table_ops::filter;
+use crate::oxql::ast::table_ops::limit::Limit;
+use crate::oxql::plan::align::Align;
+use crate::oxql::plan::delta::Delta;
+use crate::oxql::plan::filter::Filter;
+
+/// Predicates in an OxQL plan node that filters data.
+///
+/// # Overview
+///
+/// During query planning, we try to manipulate the predicates to push as much
+/// as possible close to the data. This is called "predicate pushdown". For a
+/// variety of reasons, it's not always straightforward to do this. For example,
+/// some filters only apply to data that we process in Rust, not the database.
+/// This includes queries like:
+///
+/// align mean_within(1m) | filter field == 0 && datum > 100
+///
+/// In this case, the field predicate can be pushed into the database, but it's
+/// not correct to push the filter on the datum. That only applies to the
+/// _aligned_ data, which is only computed in Rust, after fetching the data from
+/// the database.
+///
+/// In this particular case, it's fairly straightforward to split these two.
+/// Because they are joined by a logical AND, this is equivalent to:
+///
+/// filter field == 0 | align mean_within(1m) | filter datum > 100
+///
+/// And that's exactly what we do: push the field filters (and timestamps,
+/// though it's less obvious that that's correct), and keep the datum filters on
+/// the "outside" of the alignment.
+///
+/// # Disjunctions
+///
+/// This all becomes vastly more complicated in the presence of disjunctions,
+/// statements joined by logical ORs. Consider this:
+///
+/// align mean_within(1m) | filter field == 0 || datum > 100
+///
+/// In this case, pushing the field filter through is not correct! We actually
+/// want _all the data__ to make its way through the alignment operation.
+/// However, for scalability, we disallow that kind of "full table scan". But we
+/// can still show the complexities of disjunctions with a filter like:
+///
+/// filter (field == 0) || (field == 1 && datum > 100)
+///
+/// In this case, we want to push these two disjunctions _separately_ through
+/// the alignment. However, we now need to match up the predicates we can push
+/// through with those we can't, so we don't accidentally end up apply the
+/// `datum > 100` predicate to the `field == 0` data! So we now need to maintain
+/// a _list_ of predicates, with those pieces we've pushed through and those we
+/// haven't, so we can match them up.
+///
+/// This enum is used to keep track of the predicates in a query plan, as we
+/// perform these kinds of operations. Continuing with the above example, the
+/// query plan would look something like this:
+///
+/// 1. First, we construct predicates from the single query AST node
+///    representing the filter. That is, we take `filter (field == 0) || (field
+///    == 1 && datum > 100)`, and we construct:
+///
+///    ```rust,ignore
+///    Predicates::Single("(field == 0) || (field == 1 && datum > 100)")
+///    ```
+///
+/// 2. When we push this through an alignment operation, we will split them into
+///    two other variants. Those moving through the aligment are:
+///
+///    ```rust,ignore
+///    Predicates::Disjunction(vec![Some("field == 0"), Some("field == 1")])
+///    ```
+///
+///    and those left outside are:
+///
+///    ```rust,ignore
+///    Predicates::Disjunctions(vec![None, Some("datum > 100")])
+///    ```
+///
+/// It's important to note that we _do_ allow pushing through None in this
+/// particular piece of code. That's because we cannot tell locally whether
+/// we'll end up with a full table scan. The overall plan constructor checks
+/// that as one of its invariants at the end of optimization, but we don't know
+/// that just when we push predicates around individual nodes.
+///
+/// # Merging filters
+///
+/// Another query optimization is to merge neighboring filters. That is, if you
+/// write `filter field == 0 | filter other_field == 1`, we merge that into the
+/// logically-equivalent `filter field == 0 || other_field == 1`. (Note that we
+/// do not handle the case where you happen to write an unsatisfiable filter,
+/// that's an extremely hard problem!)
+///
+/// Query optimization always works "back-to-front", starting from the end of
+/// the query and processing / optimizing nodes. We make only one pass rewriting
+/// predicates, but we still don't know that all the predicates are "next to"
+/// each other in the original query. For example:
+///
+/// filter field == 0
+///     | align mean_within(1m)
+///     | filter other_field == 0 || datum > 100
+///
+/// We should be able to push through the field filter, leave behind the
+/// predicate on `datum`, and then merge the two field filters.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Predicates {
+    /// A single filter, derived from an AST node, or known to contain 1
+    /// disjunction.
+    ///
+    /// NOTE: There is no "single + optional" variant in this enum. If we push
+    /// the entire predicate through, we drop the outer plan node entirely, so
+    /// there is no "piece" of it that could not be pushed through.
+    Single(filter::Filter),
+
+    /// A list of at least 1 possibly-empty disjunctions.
+    ///
+    /// As we push disjunctions through plan nodes, we _may_ leave behind the
+    /// parts that can't be pushed. Those are represented here. If an entire
+    /// disjunct can be pushed through, then the corresponding entry in this
+    /// variant is `None`. That is a placeholder, so that we can correctly match
+    /// up predicates we push through with those we cannot push through.
+    Disjunctions(OptionalDisjunctions),
+}
+
+impl From<filter::Filter> for Predicates {
+    fn from(v: filter::Filter) -> Self {
+        Self::Single(v)
+    }
+}
+
+impl From<OptionalDisjunctions> for Predicates {
+    fn from(v: OptionalDisjunctions) -> Self {
+        Self::Disjunctions(v)
+    }
+}
+
+/// The return type from `Predicates` functions that split it around table
+/// operations.
+#[derive(Debug)]
+pub struct SplitPredicates {
+    /// The predicates, if any, we pushed through an operation.
+    pub pushed: Option<Predicates>,
+    /// The predicates, if any, we did not push through an operation.
+    pub not_pushed: Option<Predicates>,
+}
+
+impl Predicates {
+    /// Split self around an alignment plan node.
+    ///
+    /// If pushing through any of the predicates in self results in an "empty"
+    /// filter being pushed through, that returns None. This is to signal that
+    /// _none_ of the filter could be pushed through. This may indicate that the
+    /// query will result in a full table scan, which isn't allowed. But we
+    /// cannot prevent that here entirely, since there may actually be another
+    /// filtering node in the query prior to the alignment. I.e., we can't fail
+    /// the whole query until we get to the end of our optimization passes.
+    ///
+    /// For example, the filter `datum > 100` will return an error, as will
+    /// `field == 0 || datum > 100`. The filter `(field == 0 || field == 1 &&
+    /// datum > 100)` is fine, and will result in a split predicate.
+    ///
+    /// An error is returned if `self` isn't `Self::Single`.
+    pub fn split_around_align(
+        &self,
+        align: &Align,
+    ) -> anyhow::Result<SplitPredicates> {
+        let Self::Single(filter) = self else {
+            anyhow::bail!(
+                "Cannot split a filter with disjunctions around an \
+                alignment table operation"
+            );
+        };
+        // Sanity check that the filter, before being split into disjunctions,
+        // applies to the schema.
+        let schema = &align.output.tables.first().unwrap().schema;
+        Filter::ensure_filter_expr_application_is_valid(&filter.expr, schema)?;
+        let disjunctions = filter.simplify_to_dnf()?.flatten_disjunctions();
+
+        // If we have a single disjunction, we are going to return an optional
+        // single variant.
+        if disjunctions.len() == 1 {
+            let disjunct = disjunctions.into_iter().next().unwrap();
+            let pushed = disjunct
+                .remove_datum(schema)
+                .map(|maybe_removed| {
+                    maybe_removed
+                        .map(|f| f.shift_timestamp_by(align.alignment.period))
+                })?
+                .map(Predicates::Single);
+            let not_pushed =
+                disjunct.only_datum(schema)?.map(Predicates::Single);
+            return Ok(SplitPredicates { pushed, not_pushed });
+        }
+
+        // We have multiple predicates, so let's separate out them all.
+        let mut pushed = Vec::with_capacity(disjunctions.len());
+        let mut not_pushed = Vec::with_capacity(disjunctions.len());
+        for disjunct in disjunctions {
+            pushed.push(disjunct.remove_datum(schema)?);
+            not_pushed.push(disjunct.only_datum(schema)?);
+        }
+
+        // We need to "compress" either side if it's an array of all Nones. That
+        // means none of disjuncts could be reordered, and so we'll need to
+        // elide the plan node entirely.
+        let pushed = if pushed.iter().all(Option::is_none) {
+            None
+        } else {
+            Some(Predicates::from(OptionalDisjunctions(pushed)))
+        };
+        let not_pushed = if not_pushed.iter().all(Option::is_none) {
+            None
+        } else {
+            Some(Predicates::from(OptionalDisjunctions(not_pushed)))
+        };
+        Ok(SplitPredicates { pushed, not_pushed })
+    }
+
+    /// Split the predicates in self around a delta node.
+    pub fn split_around_delta(
+        &self,
+        delta: &Delta,
+    ) -> anyhow::Result<SplitPredicates> {
+        match self {
+            Predicates::Single(single) => {
+                Self::split_single_predicates_around_delta(single, delta)
+            }
+            Predicates::Disjunctions(disjunctions) => {
+                // Even though we're pushing disjunctions, we only get this by
+                // splitting around another op. That means each element cannot
+                // _contain_ disjunctions -- they are joined by OR, though. So
+                // that means each element itself is single filter expr. So
+                // calling `flatten_disjunctions()` should return self. That
+                // means we know we'll always get a single predicate (or none).
+                // Either way, we push it onto our list, and then compress if
+                // needed.
+                let mut pushed = Vec::with_capacity(disjunctions.len());
+                let mut not_pushed = Vec::with_capacity(disjunctions.len());
+                for maybe_filter in disjunctions.iter() {
+                    let Some(filter) = maybe_filter else {
+                        continue;
+                    };
+                    let this = Self::split_single_predicates_around_delta(
+                        filter, delta,
+                    )?;
+                    match this.pushed {
+                        Some(Predicates::Single(filter)) => {
+                            pushed.push(Some(filter))
+                        }
+                        Some(_) => unreachable!(),
+                        None => pushed.push(None),
+                    }
+                    match this.not_pushed {
+                        Some(Predicates::Single(filter)) => {
+                            not_pushed.push(Some(filter))
+                        }
+                        Some(_) => unreachable!(),
+                        None => not_pushed.push(None),
+                    }
+                }
+
+                // Compress again.
+                let pushed = if pushed.iter().all(Option::is_none) {
+                    None
+                } else {
+                    Some(Predicates::from(OptionalDisjunctions(pushed)))
+                };
+                let not_pushed = if not_pushed.iter().all(Option::is_none) {
+                    None
+                } else {
+                    Some(Predicates::from(OptionalDisjunctions(not_pushed)))
+                };
+                Ok(SplitPredicates { pushed, not_pushed })
+            }
+        }
+    }
+
+    fn split_single_predicates_around_delta(
+        filter: &filter::Filter,
+        delta: &Delta,
+    ) -> anyhow::Result<SplitPredicates> {
+        let schema = &delta.output;
+        let disjunctions = filter.simplify_to_dnf()?.flatten_disjunctions();
+
+        // If we have a single disjunction, we are going to return an optional
+        // single variant.
+        if disjunctions.len() == 1 {
+            let disjunct = disjunctions.into_iter().next().unwrap();
+            let pushed = disjunct.remove_datum(schema)?.map(Predicates::Single);
+            let not_pushed =
+                disjunct.only_datum(schema)?.map(Predicates::Single);
+            return Ok(SplitPredicates { pushed, not_pushed });
+        }
+
+        // We have multiple predicates, so let's separate out them all.
+        let mut pushed = Vec::with_capacity(disjunctions.len());
+        let mut not_pushed = Vec::with_capacity(disjunctions.len());
+        for disjunct in disjunctions {
+            pushed.push(disjunct.remove_datum(schema)?);
+            not_pushed.push(disjunct.only_datum(schema)?);
+        }
+
+        // We need to "compress" either side if it's an array of all Nones. That
+        // means none of disjuncts could be reordered, and so we'll need to
+        // elide the plan node entirely.
+        let pushed = if pushed.iter().all(Option::is_none) {
+            None
+        } else {
+            Some(Predicates::from(OptionalDisjunctions(pushed)))
+        };
+        let not_pushed = if not_pushed.iter().all(Option::is_none) {
+            None
+        } else {
+            Some(Predicates::from(OptionalDisjunctions(not_pushed)))
+        };
+        Ok(SplitPredicates { pushed, not_pushed })
+    }
+
+    pub(crate) fn to_required(&self) -> anyhow::Result<Vec<filter::Filter>> {
+        match self {
+            Predicates::Single(single) => {
+                single.simplify_to_dnf().map(|f| f.flatten_disjunctions())
+            }
+            Predicates::Disjunctions(disjuncts) => {
+                let mut out = Vec::with_capacity(disjuncts.len());
+                for disjunct in disjuncts.iter() {
+                    out.extend(
+                        disjunct
+                            .as_ref()
+                            .expect("Must be Some(_) here")
+                            .simplify_to_dnf()?
+                            .flatten_disjunctions(),
+                    );
+                }
+                Ok(out)
+            }
+        }
+    }
+
+    /// And this predicate with another.
+    ///
+    /// This will fail if there are different numbers of predicates in each
+    /// argument, since we won't know how to match up the predicates in that
+    /// case.
+    pub(crate) fn and(&self, other: &Self) -> anyhow::Result<Self> {
+        match (self, other) {
+            (Predicates::Single(left), Predicates::Single(right)) => {
+                Ok(Self::Single(left.and(right)))
+            }
+            (
+                Predicates::Single(single),
+                Predicates::Disjunctions(disjunctions),
+            )
+            | (
+                Predicates::Disjunctions(disjunctions),
+                Predicates::Single(single),
+            ) => {
+                anyhow::ensure!(
+                    disjunctions.len() == 1,
+                    "Can only merge together a list of predicates with a \
+                    single predicate if the list has length 1"
+                );
+                if let Some(right) = &disjunctions[0] {
+                    Ok(Self::Single(single.and(right)))
+                } else {
+                    Ok(Self::Single(single.clone()))
+                }
+            }
+            (
+                Predicates::Disjunctions(left),
+                Predicates::Disjunctions(right),
+            ) => {
+                anyhow::ensure!(
+                    left.len() == 1 && right.len() == 1,
+                    "Can only merge together lists of predicates with length 1"
+                );
+                match (&left[0], &right[0]) {
+                    (None, None) => {
+                        anyhow::bail!("Both predicates cannot be empty")
+                    }
+                    (None, Some(single)) | (Some(single), None) => {
+                        Ok(Self::Single(single.clone()))
+                    }
+                    (Some(left), Some(right)) => {
+                        Ok(Self::Single(left.and(right)))
+                    }
+                }
+            }
+        }
+    }
+
+    pub fn plan_tree_entries(&self) -> Vec<String> {
+        let mut out = Vec::with_capacity(1);
+        match self {
+            Predicates::Single(single) => {
+                out.push(format!("key group 0: {}", single));
+            }
+            Predicates::Disjunctions(disjuncts) => {
+                out.reserve(disjuncts.len().saturating_sub(1));
+                for (i, disjunct) in disjuncts.iter().enumerate() {
+                    let s = match disjunct {
+                        Some(d) => d.to_string(),
+                        None => String::from("-"),
+                    };
+                    out.push(format!("key group {i}: {s}"));
+                }
+            }
+        }
+        out
+    }
+
+    /// Return `true` if we can reorder predicates around the limit.
+    pub(crate) fn can_reorder_around(&self, limit: &Limit) -> bool {
+        match self {
+            Predicates::Single(single) => single.can_reorder_around(limit),
+            Predicates::Disjunctions(disjunctions) => {
+                disjunctions.iter().all(|maybe_filter| {
+                    maybe_filter
+                        .as_ref()
+                        .map(|filter| filter.can_reorder_around(limit))
+                        .unwrap_or(true)
+                })
+            }
+        }
+    }
+}
+
+/// A list of optional, disjunctive elements from a filter predicate.
+///
+/// See [`Predicates`] for details.
+#[derive(Clone, Debug, PartialEq)]
+pub struct OptionalDisjunctions(Vec<Option<filter::Filter>>);
+
+impl std::ops::Deref for OptionalDisjunctions {
+    type Target = [Option<filter::Filter>];
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::oxql::ast::grammar::query_parser;
+    use crate::oxql::plan::node::Node;
+    use crate::oxql::plan::plan::test_utils::all_schema;
+    use crate::oxql::plan::predicates::Predicates;
+    use crate::oxql::plan::predicates::SplitPredicates;
+    use crate::oxql::plan::Plan;
+
+    #[tokio::test]
+    async fn push_single_predicate_through_alignment() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent | align mean_within(1s)",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        let Node::Align(align) = plan.nodes().last().unwrap() else {
+            panic!(
+                "Plan should end with an align node, found {:?}",
+                plan.nodes().last().unwrap()
+            );
+        };
+        let filter = query_parser::filter("filter link_name == 'foo'").unwrap();
+        let predicates = Predicates::Single(filter.clone());
+        let SplitPredicates { pushed, not_pushed } =
+            predicates.split_around_align(&align).unwrap();
+        let Predicates::Single(single) = pushed.as_ref().unwrap() else {
+            panic!("Expected Predicates::Single, found {:?}", pushed);
+        };
+        assert_eq!(
+            &filter, single,
+            "Should have pushed entire prediate through"
+        );
+        assert!(not_pushed.is_none(), "Should have pushed entire predicate");
+    }
+
+    #[tokio::test]
+    async fn push_single_predicate_partway_through_alignment() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent | align mean_within(1s)",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        let Node::Align(align) = plan.nodes().last().unwrap() else {
+            panic!(
+                "Plan should end with an align node, found {:?}",
+                plan.nodes().last().unwrap()
+            );
+        };
+        let filter =
+            query_parser::filter("filter link_name == 'foo' && datum > 100.0")
+                .unwrap();
+        let predicates = Predicates::Single(filter.clone());
+        let SplitPredicates { pushed, not_pushed } =
+            predicates.split_around_align(&align).unwrap();
+        let Predicates::Single(single) = pushed.as_ref().unwrap() else {
+            panic!("Expected Predicates::Single, found {:?}", pushed);
+        };
+        let expected =
+            query_parser::filter("filter link_name == 'foo'").unwrap();
+        assert_eq!(
+            single, &expected,
+            "Incorrect filter pushed through alignment"
+        );
+        let Predicates::Single(left) = not_pushed.as_ref().unwrap() else {
+            panic!("Expected Predicates::Single, found {:?}", not_pushed,);
+        };
+        let expected = query_parser::filter("filter datum > 100.0").unwrap();
+        assert_eq!(
+            left, &expected,
+            "The remaining predicate on datum should have been left behind"
+        );
+    }
+
+    #[tokio::test]
+    async fn push_none_of_single_predicate_through_alignment() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent | align mean_within(1s)",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        let Node::Align(align) = plan.nodes().last().unwrap() else {
+            panic!(
+                "Plan should end with an align node, found {:?}",
+                plan.nodes().last().unwrap()
+            );
+        };
+        let filter = query_parser::filter("filter datum > 100.0").unwrap();
+        let predicates = Predicates::Single(filter.clone());
+        let SplitPredicates { pushed, not_pushed } =
+            predicates.split_around_align(&align).unwrap();
+        assert!(
+            pushed.is_none(),
+            "Should have pushed nothing through alignment"
+        );
+        let Predicates::Single(left) = not_pushed.as_ref().unwrap() else {
+            panic!("Expected Predicates::Single, found {:?}", not_pushed,);
+        };
+        let expected = query_parser::filter("filter datum > 100.0").unwrap();
+        assert_eq!(
+            left, &expected,
+            "The entire predicate on datum should have been left behind"
+        );
+    }
+
+    #[tokio::test]
+    async fn push_multiple_predicates_partway_through_alignment() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent | align mean_within(1s)",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        let Node::Align(align) = plan.nodes().last().unwrap() else {
+            panic!(
+                "Plan should end with an align node, found {:?}",
+                plan.nodes().last().unwrap()
+            );
+        };
+        let filter =
+            query_parser::filter("filter (link_name == 'foo' || link_name == 'foo') && datum > 100.0")
+                .unwrap();
+        let predicates = Predicates::Single(filter.clone());
+        let SplitPredicates { pushed, not_pushed } =
+            predicates.split_around_align(&align).unwrap();
+        let Predicates::Disjunctions(list) = pushed.as_ref().unwrap() else {
+            panic!("Expected Predicates::Disjunctions, found {:?}", pushed);
+        };
+        let expected =
+            query_parser::filter("filter link_name == 'foo'").unwrap();
+        assert_eq!(list.len(), 2);
+        for each in list.iter() {
+            let each = each.as_ref().expect("Should have pushed through");
+            assert_eq!(
+                each, &expected,
+                "Incorrect filter pushed through alignment"
+            );
+        }
+
+        let Predicates::Disjunctions(list) = not_pushed.as_ref().unwrap()
+        else {
+            panic!("Expected Predicates::Disjunctions, found {:?}", not_pushed,);
+        };
+        let expected = query_parser::filter("filter datum > 100.0").unwrap();
+        assert_eq!(list.len(), 2);
+        for each in list.iter() {
+            let each = each.as_ref().expect("Should have pushed through");
+            assert_eq!(
+                each, &expected,
+                "Incorrect filter pushed through alignment"
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn push_predicates_partway_through_alignment_differently() {
+        let query = query_parser::query(
+            "get physical_data_link:bytes_sent | align mean_within(1s)",
+        )
+        .unwrap();
+        let plan = Plan::new(query, all_schema().await).unwrap();
+        let Node::Align(align) = plan.nodes().last().unwrap() else {
+            panic!(
+                "Plan should end with an align node, found {:?}",
+                plan.nodes().last().unwrap()
+            );
+        };
+        let filter =
+            query_parser::filter("filter link_name == 'foo' || (link_name == 'foo' && datum > 100.0)")
+                .unwrap();
+        let predicates = Predicates::Single(filter.clone());
+        let SplitPredicates { pushed, not_pushed } =
+            predicates.split_around_align(&align).unwrap();
+        let Predicates::Disjunctions(list) = pushed.as_ref().unwrap() else {
+            panic!("Expected Predicates::Disjunctions, found {:?}", pushed);
+        };
+        let expected =
+            query_parser::filter("filter link_name == 'foo'").unwrap();
+        assert_eq!(list.len(), 2);
+        for each in list.iter() {
+            let each = each.as_ref().expect("Should have pushed through");
+            assert_eq!(
+                each, &expected,
+                "Incorrect filter pushed through alignment"
+            );
+        }
+
+        let Predicates::Disjunctions(list) = not_pushed.as_ref().unwrap()
+        else {
+            panic!("Expected Predicates::Disjunctions, found {:?}", not_pushed,);
+        };
+        assert_eq!(list.len(), 2);
+        assert!(list[0].is_none(), "There is no disjunct to push through");
+        let expected = query_parser::filter("filter datum > 100.0").unwrap();
+        let each = list[1].as_ref().expect("Should have pushed through");
+        assert_eq!(
+            each, &expected,
+            "Incorrect filter pushed through alignment"
+        );
+    }
+}
diff --git a/oximeter/db/src/oxql/query/mod.rs b/oximeter/db/src/oxql/query/mod.rs
index 46c9bbc92c..ec5b578ffd 100644
--- a/oximeter/db/src/oxql/query/mod.rs
+++ b/oximeter/db/src/oxql/query/mod.rs
@@ -6,6 +6,8 @@
 
 // Copyright 2024 Oxide Computer Company
 
+use std::collections::BTreeSet;
+
 use super::ast::ident::Ident;
 use super::ast::logical_op::LogicalOp;
 use super::ast::table_ops::filter::CompoundFilter;
@@ -34,15 +36,15 @@ pub struct Query {
 impl Query {
     /// Construct a query written in OxQL.
     pub fn new(query: impl AsRef<str>) -> Result<Self, Error> {
-        let raw = query.as_ref().trim();
+        let query = query.as_ref().trim();
         const MAX_LEN: usize = 4096;
         anyhow::ensure!(
-            raw.len() <= MAX_LEN,
+            query.len() <= MAX_LEN,
             "Queries must be <= {} characters",
             MAX_LEN,
         );
-        let parsed = grammar::query_parser::query(raw)
-            .map_err(|e| fmt_parse_error(raw, e))?;
+        let parsed = grammar::query_parser::query(query)
+            .map_err(|e| fmt_parse_error(query, e))?;
 
         // Fetch the latest query end time referred to in the parsed query, or
         // use now if there isn't one.
@@ -350,6 +352,16 @@ impl Query {
     pub(crate) fn split(&self) -> SplitQuery {
         self.parsed.split(self.end_time)
     }
+
+    /// Return the set of all timeseries names referred to by the query.
+    pub(crate) fn all_timeseries_names(&self) -> BTreeSet<&TimeseriesName> {
+        self.parsed.all_timeseries_names()
+    }
+
+    /// Return the parsed query AST node.
+    pub(crate) fn parsed_query(&self) -> &QueryNode {
+        &self.parsed
+    }
 }
 
 // Return a new filter containing only parts that refer to either:
diff --git a/oximeter/db/src/oxql/schema.rs b/oximeter/db/src/oxql/schema.rs
new file mode 100644
index 0000000000..269e311bfb
--- /dev/null
+++ b/oximeter/db/src/oxql/schema.rs
@@ -0,0 +1,121 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Schema for tables in OxQL queries.
+
+// Copyright 2024 Oxide Computer Company
+
+use std::collections::BTreeMap;
+
+use oximeter::DatumType;
+use oximeter::FieldType;
+use oximeter::TimeseriesSchema;
+use oxql_types::point::DataType;
+use oxql_types::point::MetricType;
+use serde::Deserialize;
+use serde::Serialize;
+
+// TODO(ben) Move MetricType and DataType enums here, or probably this type
+// there.
+// TODO(ben) Consider biting the bullet and supporting every data type now. Why
+// not? Lots more code, since we need to deal with type promotion rules and
+// such.
+
+#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
+pub struct TableSchema {
+    /// The name of the table.
+    pub name: String,
+    /// The mapping from field names to their types.
+    pub fields: BTreeMap<String, FieldType>,
+    /// The metric types of the contained data, one for each dimension.
+    ///
+    /// There can be more than one in the case of joins.
+    pub metric_types: Vec<MetricType>,
+    /// The types of the raw data, one for each dimension.
+    ///
+    /// There can be more than one in the case of joins.
+    pub data_types: Vec<DataType>,
+}
+
+impl TableSchema {
+    /// Construct a new table schema from a schema in the database.
+    ///
+    /// This builds an initial table schema for an OxQL query. Queries always
+    /// start by referring to one or more schema in the database itself. They
+    /// can change as they are processed by the query.
+    pub fn new(timeseries_schema: &TimeseriesSchema) -> Self {
+        let (metric_type, data_type) = match timeseries_schema.datum_type {
+            DatumType::Bool => (MetricType::Gauge, DataType::Boolean),
+            DatumType::I8 => (MetricType::Gauge, DataType::Integer),
+            DatumType::U8 => (MetricType::Gauge, DataType::Integer),
+            DatumType::I16 => (MetricType::Gauge, DataType::Integer),
+            DatumType::U16 => (MetricType::Gauge, DataType::Integer),
+            DatumType::I32 => (MetricType::Gauge, DataType::Integer),
+            DatumType::U32 => (MetricType::Gauge, DataType::Integer),
+            DatumType::I64 => (MetricType::Gauge, DataType::Integer),
+            DatumType::U64 => (MetricType::Gauge, DataType::Integer),
+            DatumType::F32 => (MetricType::Gauge, DataType::Double),
+            DatumType::F64 => (MetricType::Gauge, DataType::Double),
+            DatumType::String => (MetricType::Gauge, DataType::String),
+            DatumType::Bytes => (MetricType::Gauge, DataType::String),
+            DatumType::CumulativeI64 => {
+                (MetricType::Cumulative, DataType::Integer)
+            }
+            DatumType::CumulativeU64 => {
+                (MetricType::Cumulative, DataType::Integer)
+            }
+            DatumType::CumulativeF32 => {
+                (MetricType::Cumulative, DataType::Double)
+            }
+            DatumType::CumulativeF64 => {
+                (MetricType::Cumulative, DataType::Double)
+            }
+            DatumType::HistogramI8 => {
+                (MetricType::Cumulative, DataType::IntegerDistribution)
+            }
+            DatumType::HistogramU8 => {
+                (MetricType::Cumulative, DataType::IntegerDistribution)
+            }
+            DatumType::HistogramI16 => {
+                (MetricType::Cumulative, DataType::DoubleDistribution)
+            }
+            DatumType::HistogramU16 => {
+                (MetricType::Cumulative, DataType::DoubleDistribution)
+            }
+            DatumType::HistogramI32 => {
+                (MetricType::Cumulative, DataType::DoubleDistribution)
+            }
+            DatumType::HistogramU32 => {
+                (MetricType::Cumulative, DataType::DoubleDistribution)
+            }
+            DatumType::HistogramI64 => {
+                (MetricType::Cumulative, DataType::DoubleDistribution)
+            }
+            DatumType::HistogramU64 => {
+                (MetricType::Cumulative, DataType::DoubleDistribution)
+            }
+            DatumType::HistogramF32 => {
+                (MetricType::Cumulative, DataType::DoubleDistribution)
+            }
+            DatumType::HistogramF64 => {
+                (MetricType::Cumulative, DataType::DoubleDistribution)
+            }
+        };
+        Self {
+            name: timeseries_schema.timeseries_name.to_string(),
+            fields: timeseries_schema
+                .field_schema
+                .iter()
+                .map(|field| (field.name.clone(), field.field_type))
+                .collect(),
+            metric_types: vec![metric_type],
+            data_types: vec![data_type],
+        }
+    }
+
+    /// Return the type of the named field, if it is part of the schema.
+    pub(crate) fn field_type(&self, name: &str) -> Option<&FieldType> {
+        self.fields.get(name)
+    }
+}
diff --git a/oximeter/db/src/shells/oxql.rs b/oximeter/db/src/shells/oxql.rs
index f46d08c0cf..62d7f1bedc 100644
--- a/oximeter/db/src/shells/oxql.rs
+++ b/oximeter/db/src/shells/oxql.rs
@@ -83,6 +83,47 @@ pub async fn shell(
                             } else {
                                 print_oxql_operation_help(stmt);
                             }
+                        } else if let Some(stmt) = cmd.strip_prefix("plan") {
+                            match client
+                                .plan_oxql_query(
+                                    stmt.trim().trim_end_matches(';'),
+                                )
+                                .await
+                            {
+                                Ok(plan) => {
+                                    println!(
+                                        "{}\n{}\n",
+                                        "OxQL query plan".underlined(),
+                                        plan.query,
+                                    );
+                                    let (original, optimized_plan) =
+                                        plan.to_plan_tree();
+                                    println!("{}", "Original".underlined());
+                                    println!("{}", original);
+                                    println!("{}", "Optimized".underlined());
+                                    println!("{}", optimized_plan);
+                                    let output = plan.output();
+                                    println!(
+                                        "{} ({} table{})",
+                                        "Output".underlined(),
+                                        output.n_tables(),
+                                        if output.n_tables() == 1 {
+                                            ""
+                                        } else {
+                                            "s"
+                                        },
+                                    );
+                                    println!("{}", output);
+                                    println!(
+                                        "Planning time: {:?}",
+                                        plan.duration
+                                    );
+                                }
+                                Err(e) => {
+                                    eprintln!("{}", "Error".underlined().red());
+                                    eprintln!("{e}");
+                                }
+                            }
                         } else {
                             match client
                                 .oxql_query(cmd.trim().trim_end_matches(';'))
@@ -151,7 +192,7 @@ Get instances of a timeseries by name"#;
             println!("{HELP}");
         }
         "filter" => {
-            const HELP: &str = r#"filter <expr>");
+            const HELP: &str = r#"filter <expr>";
 
 Filter timeseries based on their attributes.
 <expr> can be a logical combination of filtering
@@ -190,6 +231,18 @@ account the timestamps, and does not align the outputs
 directly."#;
             println!("{HELP}");
         }
+        "align" => {
+            const HELP: &str = r#"align <alignment_method>
+
+Align the timepoints in each timeseries, so that they
+occur on exactly regular intervals. Nearby timepoints
+may be combined with the specified alignment method.
+Alignment is required to combine multiple timeseries
+together, either by joining them across tables, or grouping
+them within tables.
+"#;
+            println!("{HELP}");
+        }
         _ => eprintln!("unrecognized OxQL operation: '{op}'"),
     }
 }
@@ -202,6 +255,7 @@ fn print_basic_commands() {
     println!("  \\l                 - List timeseries");
     println!("  \\d <timeseries>    - Describe a timeseries");
     println!("  \\ql [<operation>]  - Get OxQL help about an operation");
+    println!("  plan <query>       - Emit the query plan for an OxQL query");
     println!();
     println!("Or try entering an OxQL `get` query");
 }
@@ -219,12 +273,13 @@ operator, "|".
 All queries start with a `get` operation, which selects a
 timeseries from the database, by name. For example:
 
-`get physical_data_link:bytes_received`
+`get sled_data_link:bytes_received`
 
 The supported timeseries operations are:
 
 - get: Select a timeseries by name
 - filter: Filter timeseries by field or sample values
+- align: Temporally align timeseries, combining nearby points.
 - group_by: Group timeseries by fields, applying a reducer.
 - join: Join two or more timeseries together
 
diff --git a/oximeter/db/tests/integration_test.rs b/oximeter/db/tests/integration_test.rs
index eca0adf7d5..eedc7b0862 100644
--- a/oximeter/db/tests/integration_test.rs
+++ b/oximeter/db/tests/integration_test.rs
@@ -211,7 +211,9 @@ async fn test_cluster() -> anyhow::Result<()> {
     // Get all the samples from the replica where the data was inserted
     let start = tokio::time::Instant::now();
     let oxql_res1 = client1
-        .oxql_query("get virtual_machine:cpu_busy")
+        .oxql_query(
+            "get virtual_machine:cpu_busy | filter timestamp > @2000-01-01",
+        )
         .await
         .expect("failed to get all samples");
     info!(log, "query samples from client1 time = {:?}", start.elapsed());
@@ -425,7 +427,7 @@ async fn wait_for_num_points(
     poll::wait_for_condition(
         || async {
             let oxql_res = client
-                .oxql_query("get virtual_machine:cpu_busy")
+                .oxql_query("get virtual_machine:cpu_busy | filter timestamp > @2000-01-01")
                 .await
                 .map_err(|_| {
                     poll::CondCheckError::<oximeter_db::Error>::NotYet
diff --git a/oximeter/db/tests/timeseries-schema.json b/oximeter/db/tests/timeseries-schema.json
new file mode 100644
index 0000000000..43be9dd80f
--- /dev/null
+++ b/oximeter/db/tests/timeseries-schema.json
@@ -0,0 +1,83 @@
+{"timeseries_name":"collection_target:cpus_provisioned","fields.name":["id"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"I64","created":"2023-10-25 23:05:04.909050887"}
+{"timeseries_name":"collection_target:ram_provisioned","fields.name":["id"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"I64","created":"2023-10-25 23:05:04.915652918"}
+{"timeseries_name":"collection_target:virtual_disk_space_provisioned","fields.name":["id"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"I64","created":"2023-10-25 23:13:04.927950665"}
+{"timeseries_name":"crucible_upstairs:activated","fields.name":["upstairs_uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.899663499"}
+{"timeseries_name":"crucible_upstairs:extent_no_op","fields.name":["upstairs_uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.930943640"}
+{"timeseries_name":"crucible_upstairs:extent_reopen","fields.name":["upstairs_uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.934827868"}
+{"timeseries_name":"crucible_upstairs:extent_repair","fields.name":["upstairs_uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.927207024"}
+{"timeseries_name":"crucible_upstairs:flush","fields.name":["upstairs_uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.903985541"}
+{"timeseries_name":"crucible_upstairs:flush_close","fields.name":["upstairs_uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.923454748"}
+{"timeseries_name":"crucible_upstairs:read","fields.name":["upstairs_uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.915831652"}
+{"timeseries_name":"crucible_upstairs:read_bytes","fields.name":["upstairs_uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.919609014"}
+{"timeseries_name":"crucible_upstairs:write","fields.name":["upstairs_uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.908085310"}
+{"timeseries_name":"crucible_upstairs:write_bytes","fields.name":["upstairs_uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.911931515"}
+{"timeseries_name":"data_link:abort","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.798993111"}
+{"timeseries_name":"data_link:b_e_r_check_done","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.783030992"}
+{"timeseries_name":"data_link:b_e_r_check_start","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.780161540"}
+{"timeseries_name":"data_link:bad_sync_headers","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.740936507"}
+{"timeseries_name":"data_link:disabled","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.801743186"}
+{"timeseries_name":"data_link:enabled","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"Bool","created":"2023-10-25 23:03:45.612915119"}
+{"timeseries_name":"data_link:end","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.785599421"}
+{"timeseries_name":"data_link:errored_blocks","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.743619233"}
+{"timeseries_name":"data_link:fec_align","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"Bool","created":"2023-10-25 23:03:45.714770919"}
+{"timeseries_name":"data_link:fec_corr_cnt","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.717128696"}
+{"timeseries_name":"data_link:fec_hi_ser","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"Bool","created":"2023-10-25 23:03:45.712431057"}
+{"timeseries_name":"data_link:fec_ser_lane0","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.721850240"}
+{"timeseries_name":"data_link:fec_ser_lane1","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.724264062"}
+{"timeseries_name":"data_link:fec_ser_lane2","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.726673517"}
+{"timeseries_name":"data_link:fec_ser_lane3","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.729043757"}
+{"timeseries_name":"data_link:fec_ser_lane4","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.731511643"}
+{"timeseries_name":"data_link:fec_ser_lane5","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.733885410"}
+{"timeseries_name":"data_link:fec_ser_lane6","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.736336104"}
+{"timeseries_name":"data_link:fec_ser_lane7","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.738594962"}
+{"timeseries_name":"data_link:fec_uncorr_cnt","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.719451726"}
+{"timeseries_name":"data_link:idle","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.761751371"}
+{"timeseries_name":"data_link:link_down","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.774724311"}
+{"timeseries_name":"data_link:link_up","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"Bool","created":"2023-10-25 23:03:45.660176566"}
+{"timeseries_name":"data_link:monitor_p_r_b_s_errors","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.793275938"}
+{"timeseries_name":"data_link:pci_hi_ber","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.751291996"}
+{"timeseries_name":"data_link:pcs_block_lock_loss","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.748686727"}
+{"timeseries_name":"data_link:pcs_invalid_errors","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.759320906"}
+{"timeseries_name":"data_link:pcs_sync_loss","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.746113450"}
+{"timeseries_name":"data_link:pcs_unknown_errors","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.756839686"}
+{"timeseries_name":"data_link:pcs_valid_errors","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.754036510"}
+{"timeseries_name":"data_link:remote_fault","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.772173897"}
+{"timeseries_name":"data_link:rx_buf_full","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.700321323"}
+{"timeseries_name":"data_link:rx_bytes","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.695469190"}
+{"timeseries_name":"data_link:rx_crc_errs","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.697937216"}
+{"timeseries_name":"data_link:rx_errs","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.702700701"}
+{"timeseries_name":"data_link:rx_pkts","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.689905229"}
+{"timeseries_name":"data_link:tofino3_states","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.796116455"}
+{"timeseries_name":"data_link:tx_bytes","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.707581108"}
+{"timeseries_name":"data_link:tx_errs","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.709933314"}
+{"timeseries_name":"data_link:tx_pkts","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.705180840"}
+{"timeseries_name":"data_link:wait_auto_neg_done","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.788057428"}
+{"timeseries_name":"data_link:wait_auto_neg_link_training_done","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.790645334"}
+{"timeseries_name":"data_link:wait_d_f_e_done","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.769677587"}
+{"timeseries_name":"data_link:wait_p_l_l_ready","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.764520692"}
+{"timeseries_name":"data_link:wait_signal_o_k","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.767257141"}
+{"timeseries_name":"data_link:wait_test_done","fields.name":["link_id","port_id","rack_id","sidecar_id","sled_id"],"fields.type":["I64","String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.777584285"}
+{"timeseries_name":"database_transaction:retry_data","fields.name":["attempt","name"],"fields.type":["U32","String"],"fields.source":["Metric","Target"],"datum_type":"F64","created":"2023-12-09 05:05:12.777325080"}
+{"timeseries_name":"http_service:request_latency_histogram","fields.name":["id","method","name","route","status_code"],"fields.type":["Uuid","String","String","String","I64"],"fields.source":["Target","Metric","Target","Metric","Metric"],"datum_type":"HistogramF64","created":"2023-10-25 23:04:04.891225842"}
+{"timeseries_name":"instance_uuid:pv_panic_guest_handled","fields.name":["uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2024-02-01 06:42:49.623791045"}
+{"timeseries_name":"instance_uuid:pv_panic_host_handled","fields.name":["uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2024-02-01 06:42:49.632352775"}
+{"timeseries_name":"instance_uuid:reset","fields.name":["uuid"],"fields.type":["Uuid"],"fields.source":["Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:04:34.894527394"}
+{"timeseries_name":"kstat_sampler:samples_dropped","fields.name":["hostname","target_id","target_name"],"fields.type":["String","U64","String"],"fields.source":["Target","Metric","Metric"],"datum_type":"CumulativeU64","created":"2023-11-17 02:53:34.642824724"}
+{"timeseries_name":"oximeter_collector:collections","fields.name":["base_route","collector_id","collector_ip","collector_port","producer_id","producer_ip","producer_port"],"fields.type":["String","Uuid","IpAddr","U16","Uuid","IpAddr","U16"],"fields.source":["Metric","Target","Target","Target","Metric","Metric","Metric"],"datum_type":"CumulativeU64","created":"2023-11-17 02:53:57.255983666"}
+{"timeseries_name":"oximeter_collector:failed_collections","fields.name":["base_route","collector_id","collector_ip","collector_port","producer_id","producer_ip","producer_port","reason"],"fields.type":["String","Uuid","IpAddr","U16","Uuid","IpAddr","U16","String"],"fields.source":["Metric","Target","Target","Target","Metric","Metric","Metric","Metric"],"datum_type":"CumulativeU64","created":"2023-11-17 02:56:39.525743256"}
+{"timeseries_name":"physical_data_link:bytes_received","fields.name":["hostname","link_name","rack_id","serial","sled_id"],"fields.type":["String","String","Uuid","String","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeU64","created":"2023-11-17 02:53:34.484386388"}
+{"timeseries_name":"physical_data_link:bytes_sent","fields.name":["hostname","link_name","rack_id","serial","sled_id"],"fields.type":["String","String","Uuid","String","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeU64","created":"2023-11-17 02:53:34.138101022"}
+{"timeseries_name":"physical_data_link:errors_received","fields.name":["hostname","link_name","rack_id","serial","sled_id"],"fields.type":["String","String","Uuid","String","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeU64","created":"2023-11-17 02:53:34.449876345"}
+{"timeseries_name":"physical_data_link:errors_sent","fields.name":["hostname","link_name","rack_id","serial","sled_id"],"fields.type":["String","String","Uuid","String","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeU64","created":"2023-11-17 02:53:34.477064098"}
+{"timeseries_name":"physical_data_link:packets_received","fields.name":["hostname","link_name","rack_id","serial","sled_id"],"fields.type":["String","String","Uuid","String","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeU64","created":"2023-11-17 02:53:34.488575473"}
+{"timeseries_name":"physical_data_link:packets_sent","fields.name":["hostname","link_name","rack_id","serial","sled_id"],"fields.type":["String","String","Uuid","String","Uuid"],"fields.source":["Target","Target","Target","Target","Target"],"datum_type":"CumulativeU64","created":"2023-11-17 02:53:34.439549378"}
+{"timeseries_name":"sidecar:sample_time","fields.name":["board_rev","rack_id","sidecar_id","sled_id"],"fields.type":["String","Uuid","Uuid","Uuid"],"fields.source":["Target","Target","Target","Target"],"datum_type":"I64","created":"2023-10-25 23:03:45.947714957"}
+{"timeseries_name":"switch_table:capacity","fields.name":["rack_id","sidecar_id","sled_id","table"],"fields.type":["Uuid","Uuid","Uuid","String"],"fields.source":["Target","Target","Target","Target"],"datum_type":"I64","created":"2023-10-25 23:03:45.916472267"}
+{"timeseries_name":"switch_table:collisions","fields.name":["rack_id","sidecar_id","sled_id","table"],"fields.type":["Uuid","Uuid","Uuid","String"],"fields.source":["Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.930654934"}
+{"timeseries_name":"switch_table:delete_misses","fields.name":["rack_id","sidecar_id","sled_id","table"],"fields.type":["Uuid","Uuid","Uuid","String"],"fields.source":["Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.935986861"}
+{"timeseries_name":"switch_table:deletes","fields.name":["rack_id","sidecar_id","sled_id","table"],"fields.type":["Uuid","Uuid","Uuid","String"],"fields.source":["Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.925327996"}
+{"timeseries_name":"switch_table:exhaustion","fields.name":["rack_id","sidecar_id","sled_id","table"],"fields.type":["Uuid","Uuid","Uuid","String"],"fields.source":["Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.938635382"}
+{"timeseries_name":"switch_table:inserts","fields.name":["rack_id","sidecar_id","sled_id","table"],"fields.type":["Uuid","Uuid","Uuid","String"],"fields.source":["Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.922203276"}
+{"timeseries_name":"switch_table:occupancy","fields.name":["rack_id","sidecar_id","sled_id","table"],"fields.type":["Uuid","Uuid","Uuid","String"],"fields.source":["Target","Target","Target","Target"],"datum_type":"I64","created":"2023-10-25 23:03:45.919412144"}
+{"timeseries_name":"switch_table:update_misses","fields.name":["rack_id","sidecar_id","sled_id","table"],"fields.type":["Uuid","Uuid","Uuid","String"],"fields.source":["Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.933217752"}
+{"timeseries_name":"switch_table:updates","fields.name":["rack_id","sidecar_id","sled_id","table"],"fields.type":["Uuid","Uuid","Uuid","String"],"fields.source":["Target","Target","Target","Target"],"datum_type":"CumulativeI64","created":"2023-10-25 23:03:45.927951288"}
diff --git a/oximeter/oxql-types/src/point.rs b/oximeter/oxql-types/src/point.rs
index 6e3c7143dc..98338cfd76 100644
--- a/oximeter/oxql-types/src/point.rs
+++ b/oximeter/oxql-types/src/point.rs
@@ -43,9 +43,17 @@ pub enum DataType {
 
 impl DataType {
     /// True if this is a numeric scalar type.
-    pub fn is_numeric(&self) -> bool {
+    pub const fn is_numeric(&self) -> bool {
         matches!(self, DataType::Integer | DataType::Double)
     }
+
+    /// Return true if this is a distribution data type.
+    pub const fn is_distribution(&self) -> bool {
+        matches!(
+            self,
+            DataType::IntegerDistribution | DataType::DoubleDistribution
+        )
+    }
 }
 
 impl TryFrom<DatumType> for DataType {
@@ -107,6 +115,12 @@ pub enum MetricType {
     /// The value represents an accumulation between two points in time.
     Cumulative,
 }
+impl MetricType {
+    /// Return true if this is cumulative.
+    pub const fn is_cumulative(&self) -> bool {
+        matches!(self, MetricType::Cumulative)
+    }
+}
 
 impl fmt::Display for MetricType {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {