observablehq · mbostock · Aug 11, 2021 · Aug 11, 2021 · Aug 11, 2021 · Aug 11, 2021
diff --git a/README.md b/README.md
@@ -987,7 +987,7 @@ Filters the data given the specified *test*. The test can be given as an accesso
 
 [Source](./src/transforms/bin.js) · [Examples](https://observablehq.com/@observablehq/plot-bin) · Aggregates continuous data — quantitative or temporal values such as temperatures or times — into discrete bins and then computes summary statistics for each bin such as a count or sum. The bin transform is like a continuous [group transform](#group) and is often used to make histograms. There are separate transforms depending on which dimensions need binning: [Plot.binX](#plotbinxoutputs-options) for *x*; [Plot.binY](#plotbinyoutputs-options) for *y*; and [Plot.bin](#plotbinoutputs-options) for both *x* and *y*.
 
-Given input *data* = [*d₀*, *d₁*, *d₂*, …], by default the resulting binned data is an array of arrays where each inner array is a subset of the input data [[*d₀₀*, *d₀₁*, …], [*d₁₀*, *d₁₁*, …], [*d₂₀*, *d₂₁*, …], …]. Each inner array is in input order. The outer array is in ascending order according to the associated dimension (*x* then *y*). Empty bins are skipped. By specifying a different aggregation method for the *data* output, as described below, you can change how the binned data is computed.
+Given input *data* = [*d₀*, *d₁*, *d₂*, …], by default the resulting binned data is an array of arrays where each inner array is a subset of the input data [[*d₀₀*, *d₀₁*, …], [*d₁₀*, *d₁₁*, …], [*d₂₀*, *d₂₁*, …], …]. Each inner array is in input order. The outer array is in ascending order according to the associated dimension (*x* then *y*). Empty bins are skipped. By specifying a different aggregation method for the *data* output, as described below, you can change how the binned data is computed. The outputs may also include *filter* and *sort* options specified as aggregation methods, and a *reverse* option to reverse the order of generated bins. By default, empty bins are omitted, and non-empty bins are generated in ascending threshold order.
 
 While it is possible to compute channel values on the binned data by defining channel values as a function, more commonly channel values are computed directly by the bin transform, either implicitly or explicitly. In addition to data, the following channels are automatically aggregated:
 
@@ -1119,7 +1119,7 @@ Bins on *y*. Groups on on *x* and first channel of *z*, *fill*, or *stroke*, if
 
 [Source](./src/transforms/group.js) · [Examples](https://observablehq.com/@observablehq/plot-group) · Aggregates ordinal or categorical data — such as names — into groups and then computes summary statistics for each group such as a count or sum. The group transform is like a discrete [bin transform](#bin). There are separate transforms depending on which dimensions need grouping: [Plot.groupZ](#plotgroupzoutputs-options) for *z*; [Plot.groupX](#plotgroupxoutputs-options) for *x* and *z*; [Plot.groupY](#plotgroupyoutputs-options) for *y* and *z*; and [Plot.group](#plotgroupoutputs-options) for *x*, *y*, and *z*.
 
-Given input *data* = [*d₀*, *d₁*, *d₂*, …], by default the resulting grouped data is an array of arrays where each inner array is a subset of the input data [[*d₀₀*, *d₀₁*, …], [*d₁₀*, *d₁₁*, …], [*d₂₀*, *d₂₁*, …], …]. Each inner array is in input order. The outer array is in natural ascending order according to the associated dimension (*x* then *y*). Empty groups are skipped. By specifying a different aggregation method for the *data* output, as described below, you can change how the grouped data is computed.
+Given input *data* = [*d₀*, *d₁*, *d₂*, …], by default the resulting grouped data is an array of arrays where each inner array is a subset of the input data [[*d₀₀*, *d₀₁*, …], [*d₁₀*, *d₁₁*, …], [*d₂₀*, *d₂₁*, …], …]. Each inner array is in input order. The outer array is in natural ascending order according to the associated dimension (*x* then *y*). Empty groups are skipped. By specifying a different aggregation method for the *data* output, as described below, you can change how the grouped data is computed. The outputs may also include *filter* and *sort* options specified as aggregation methods, and a *reverse* option to reverse the order of generated groups. By default, all (non-empty) groups are generated in ascending natural order.
 
 While it is possible to compute channel values on the grouped data by defining channel values as a function, more commonly channel values are computed directly by the group transform, either implicitly or explicitly. In addition to data, the following channels are automatically aggregated:
 

diff --git a/src/transforms/bin.js b/src/transforms/bin.js
@@ -2,7 +2,7 @@ import {bin as binner, extent, thresholdFreedmanDiaconis, thresholdScott, thresh
 import {valueof, range, identity, maybeLazyChannel, maybeTuple, maybeColor, maybeValue, mid, labelof, isTemporal} from "../mark.js";
 import {offset} from "../style.js";
 import {basic} from "./basic.js";
-import {maybeGroup, maybeOutputs, maybeReduce, maybeSubgroup, reduceIdentity} from "./group.js";
+import {maybeEvaluator, maybeGroup, maybeOutput, maybeOutputs, maybeReduce, maybeSort, maybeSubgroup, reduceCount, reduceIdentity} from "./group.js";
 
 // Group on {z, fill, stroke}, then optionally on y, then bin x.
 export function binX(outputs = {y: "count"}, {inset, insetLeft, insetRight, ...options} = {}) {
@@ -33,16 +33,25 @@ function binn(
   by, // optionally bin on y (exclusive with gy)
   gx, // optionally group on x (exclusive with bx and gy)
   gy, // optionally group on y (exclusive with by and gx)
-  {data: reduceData = reduceIdentity, ...outputs} = {}, // output channel definitions
+  {
+    data: reduceData = reduceIdentity,
+    filter = reduceCount, // return only non-empty bins by default
+    sort,
+    reverse,
+    ...outputs // output channel definitions
+  } = {},
   inputs = {} // input channels and options
 ) {
   bx = maybeBin(bx);
   by = maybeBin(by);
-  reduceData = maybeReduce(reduceData, identity);
 
-  // Compute the outputs. Don’t group on a channel if one of the output channels
-  // requires it as an input!
+  // Compute the outputs.
   outputs = maybeOutputs(outputs, inputs);
+  reduceData = maybeReduce(reduceData, identity);
+  sort = sort == null ? undefined : maybeOutput("sort", sort, inputs);
+  filter = filter == null ? undefined : maybeEvaluator("filter", filter, inputs);
+
+  // Don’t group on a channel if an output requires it as an input!
   if (gx != null && hasOutput(outputs, "x", "x1", "x2")) gx = null;
   if (gy != null && hasOutput(outputs, "y", "y1", "y2")) gy = null;
 
@@ -90,17 +99,20 @@ function binn(
       const BY2 = by && setBY2([]);
       let i = 0;
       for (const o of outputs) o.initialize(data);
+      if (sort) sort.initialize(data);
+      if (filter) filter.initialize(data);
       for (const facet of facets) {
         const groupFacet = [];
         for (const o of outputs) o.scope("facet", facet);
+        if (sort) sort.scope("facet", facet);
+        if (filter) filter.scope("facet", facet);
         for (const [, I] of maybeGroup(facet, G)) {
           for (const [k, g] of maybeGroup(I, K)) {
             for (const [x1, x2, fx] of BX) {
               const bb = fx(g);
-              if (bb.length === 0) continue;
               for (const [y1, y2, fy] of BY) {
                 const b = fy(bb);
-                if (b.length === 0) continue;
+                if (filter && !filter.reduce(b)) continue;
                 groupFacet.push(i++);
                 groupData.push(reduceData.reduce(b, data));
                 if (K) GK.push(k);
@@ -110,12 +122,14 @@ function binn(
                 if (BX1) BX1.push(x1), BX2.push(x2);
                 if (BY1) BY1.push(y1), BY2.push(y2);
                 for (const o of outputs) o.reduce(b);
+                if (sort) sort.reduce(b);
               }
             }
           }
         }
         groupFacets.push(groupFacet);
       }
+      maybeSort(groupFacets, sort, reverse);
       return {data: groupData, facets: groupFacets};
     }),
     ...BX1 ? {x1: BX1, x2: BX2, x: mid(BX1, BX2)} : {x},
@@ -166,7 +180,7 @@ function maybeBin(options) {
     }
     let bins = bin(range(data)).map(binset);
     if (cumulative) bins = (cumulative < 0 ? bins.reverse() : bins).map(bincumset);
-    return bins.filter(nonempty2).map(binfilter);
+    return bins.map(binfilter);
   };
   bin.label = labelof(value);
   return bin;
@@ -231,11 +245,11 @@ function bincumset([bin], j, bins) {
 }
 
 function binfilter([{x0, x1}, set]) {
-  return [x0, x1, I => I.filter(set.has, set)]; // TODO optimize
+  return [x0, x1, set.size ? I => I.filter(set.has, set) : binempty];
 }
 
-function nonempty2([, {size}]) {
-  return size > 0;
+function binempty() {
+  return new Uint32Array(0);
 }
 
 function maybeInset(inset, inset1, inset2) {

diff --git a/src/transforms/group.js b/src/transforms/group.js
@@ -1,5 +1,5 @@
 import {group as grouper, sort, sum, deviation, min, max, mean, median, mode, variance, InternSet} from "d3";
-import {firstof} from "../defined.js";
+import {ascendingDefined, firstof} from "../defined.js";
 import {valueof, maybeColor, maybeInput, maybeTuple, maybeLazyChannel, lazyChannel, first, identity, take, labelof, range} from "../mark.js";
 import {basic} from "./basic.js";
 
@@ -34,11 +34,21 @@ export function group(outputs, options = {}) {
 function groupn(
   x, // optionally group on x
   y, // optionally group on y
-  {data: reduceData = reduceIdentity, ...outputs} = {}, // output channel definitions
+  {
+    data: reduceData = reduceIdentity,
+    filter,
+    sort,
+    reverse,
+    ...outputs // output channel definitions
+  } = {},
   inputs = {} // input channels and options
 ) {
-  reduceData = maybeReduce(reduceData, identity);
+
+  // Compute the outputs.
   outputs = maybeOutputs(outputs, inputs);
+  reduceData = maybeReduce(reduceData, identity);
+  sort = sort == null ? undefined : maybeOutput("sort", sort, inputs);
+  filter = filter == null ? undefined : maybeEvaluator("filter", filter, inputs);
 
   // Produce x and y output channels as appropriate.
   const [GX, setGX] = maybeLazyChannel(x);
@@ -74,12 +84,17 @@ function groupn(
       const GS = S && setGS([]);
       let i = 0;
       for (const o of outputs) o.initialize(data);
+      if (sort) sort.initialize(data);
+      if (filter) filter.initialize(data);
       for (const facet of facets) {
         const groupFacet = [];
         for (const o of outputs) o.scope("facet", facet);
+        if (sort) sort.scope("facet", facet);
+        if (filter) filter.scope("facet", facet);
         for (const [, I] of maybeGroup(facet, G)) {
           for (const [y, gg] of maybeGroup(I, Y)) {
             for (const [x, g] of maybeGroup(gg, X)) {
+              if (filter && !filter.reduce(g)) continue;
               groupFacet.push(i++);
               groupData.push(reduceData.reduce(g, data));
               if (X) GX.push(x);
@@ -88,11 +103,13 @@ function groupn(
               if (F) GF.push(F[g[0]]);
               if (S) GS.push(S[g[0]]);
               for (const o of outputs) o.reduce(g);
+              if (sort) sort.reduce(g);
             }
           }
         }
         groupFacets.push(groupFacet);
       }
+      maybeSort(groupFacets, sort, reverse);
       return {data: groupData, facets: groupFacets};
     }),
     ...GX && {x: GX},
@@ -102,31 +119,50 @@ function groupn(
 }
 
 export function maybeOutputs(outputs, inputs) {
-  return Object.entries(outputs).map(([name, reduce]) => {
-    const value = maybeInput(name, inputs);
-    const reducer = maybeReduce(reduce, value);
-    const [output, setOutput] = lazyChannel(labelof(value, reducer.label));
-    let V, O, context;
-    return {
-      name,
-      output,
-      initialize(data) {
-        V = value === undefined ? data : valueof(data, value);
-        O = setOutput([]);
-        if (reducer.scope === "data") {
-          context = reducer.reduce(range(data), V);
-        }
-      },
-      scope(scope, I) {
-        if (reducer.scope === scope) {
-          context = reducer.reduce(I, V);
-        }
-      },
-      reduce(I) {
-        O.push(reducer.reduce(I, V, context));
+  return Object.entries(outputs).map(([name, reduce]) => maybeOutput(name, reduce, inputs));
+}
+
+export function maybeOutput(name, reduce, inputs) {
+  const evaluator = maybeEvaluator(name, reduce, inputs);
+  const [output, setOutput] = lazyChannel(evaluator.label);
+  let O;
+  return {
+    name,
+    output,
+    initialize(data) {
+      evaluator.initialize(data);
+      O = setOutput([]);
+    },
+    scope(scope, I) {
+      evaluator.scope(scope, I);
+    },
+    reduce(I) {
+      O.push(evaluator.reduce(I));
+    }
+  };
+}
+
+export function maybeEvaluator(name, reduce, inputs) {
+  const input = maybeInput(name, inputs);
+  const reducer = maybeReduce(reduce, input);
+  let V, context;
+  return {
+    label: labelof(input, reducer.label),
+    initialize(data) {
+      V = input === undefined ? data : valueof(data, input);
+      if (reducer.scope === "data") {
+        context = reducer.reduce(range(data), V);
       }
-    };
-  });
+    },
+    scope(scope, I) {
+      if (reducer.scope === scope) {
+        context = reducer.reduce(I, V);
+      }
+    },
+    reduce(I) {
+      return reducer.reduce(I, V, context);
+    }
+  };
 }
 
 export function maybeGroup(I, X) {
@@ -163,6 +199,17 @@ export function maybeSubgroup(outputs, Z, F, S) {
   );
 }
 
+export function maybeSort(facets, sort, reverse) {
+  if (sort) {
+    const S = sort.output.transform();
+    const compare = (i, j) => ascendingDefined(S[i], S[j]);
+    facets.forEach(f => f.sort(compare));
+  }
+  if (reverse) {
+    facets.forEach(f => f.reverse());
+  }
+}
+
 function reduceFunction(f) {
   return {
     reduce(I, X) {
@@ -197,7 +244,7 @@ const reduceLast = {
   }
 };
 
-const reduceCount = {
+export const reduceCount = {
   label: "Frequency",
   reduce(I) {
     return I.length;