Skip to content

Commit

Permalink
Add unfiltered self aggregations #5825
Browse files Browse the repository at this point in the history
  • Loading branch information
StepanBrychta committed Dec 2, 2024
1 parent d70bce9 commit 42428f3
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 117 deletions.
60 changes: 51 additions & 9 deletions search/src/main/scala/weco/api/search/models/Aggregation.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,48 @@ import io.circe.optics.JsonPath._

import scala.util.{Try}

// Each field we aggregate on will return aggregation buckets in the following nested format:
//{
// "filtered": {
// "nestedSelf": {
// "terms": {
// "buckets": [...]
// }
// },
// "nested": {
// "terms": {
// "buckets": [...]
// }
// }
// },
// "nestedSelf": {
// "terms": {
// "buckets": [...]
// }
// }
//}

// Each bucket then has the following format:
//{
// "key": "i",
// "doc_count": 1,
// "labels": {
// "buckets": [
// {
// "key": "Audio",
// "doc_count": 1
// }
// ]
// }
//}
object AggregationMapping {
import weco.json.JsonUtil._
// We can't predict the key name of the resultant sub-aggregation.
// This optic says, "for each key of the root object that has a key `buckets`, decode
// the value of that field as an array of Buckets"
private val globalAggBuckets =
root.nested.each.buckets.each.as[RawAggregationBucket]
// This optic does the same for buckets within the self aggregation
root.filtered.nested.terms.buckets.each.as[RawAggregationBucket]
private val selfAggBuckets =
root.nestedSelf.each.buckets.each.as[RawAggregationBucket]
root.filtered.nestedSelf.terms.buckets.each.as[RawAggregationBucket]
private val unfilteredSelfAggBuckets =
root.nestedSelf.terms.buckets.each.as[RawAggregationBucket]

// When we use the self aggregation pattern, buckets are returned
// in aggregations at multiple depths. This will return
Expand Down Expand Up @@ -75,14 +107,24 @@ object AggregationMapping {
.toList
.sortBy(b => (-b.count, b.data.label))

private def getUnfilteredIdLabelMapping(json: Json): Map[String, String] = {
val unfilteredSelfBuckets = parseNestedAggregationBuckets(unfilteredSelfAggBuckets.getAll(json))
unfilteredSelfBuckets.map(bucket => bucket.data.id -> bucket.data.label).toMap
}

def aggregationParser(
jsonString: String
): Try[Aggregation] = {
parse(jsonString)
.map { json =>
val nestedBuckets =
parseNestedAggregationBuckets(bucketsFromAnywhere(json))
Aggregation(nestedBuckets)
val nestedBuckets = parseNestedAggregationBuckets(bucketsFromAnywhere(json))
val unfilteredIdLabelMap = getUnfilteredIdLabelMapping(json)

val nestedBucketsWithUpdatedLabels = nestedBuckets.map { bucket =>
val id = bucket.data.id
bucket.copy(data = AggregationBucketData(id = id, unfilteredIdLabelMap.getOrElse(id, bucket.data.label)))
}
Aggregation(nestedBucketsWithUpdatedLabels)
}
}.toTry
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
package weco.api.search.services

import com.sksamuel.elastic4s.ElasticApi.{boolQuery, termsAgg}
import com.sksamuel.elastic4s.requests.searches.aggs.{
Aggregation,
FilterAggregation,
NestedAggregation,
TermsAggregation,
TermsOrder
}
import com.sksamuel.elastic4s.ElasticApi.{boolQuery, matchAllQuery, termsAgg}
import com.sksamuel.elastic4s.requests.searches.aggs.{Aggregation, FilterAggregation, NestedAggregation, TermsAggregation, TermsOrder}
import com.sksamuel.elastic4s.requests.searches.queries.Query
import com.sksamuel.elastic4s.requests.searches.term.TermsQuery
import weco.api.search.models.Pairable
Expand Down Expand Up @@ -81,21 +75,29 @@ trait AggregationsBuilder[AggregationRequest, Filter] {
case _ => None
}

FilterAggregation(
name = params.name,
val filterAggregation = FilterAggregation(
name = "filtered",
boolQuery.filter(query),
subaggs = Seq(
Some(toAggregation(params, "nested", List())),
selfAggregation).flatten
)

FilterAggregation(
name = params.name,
query = matchAllQuery(),
subaggs = Seq(Some(filterAggregation), selfAggregation).flatten
)
}

private def toTermsAggregation(
params: AggregationParams,
name: String,
fieldPath: String,
size: Int,
include: List[String]
): TermsAggregation = {
val aggregation = termsAgg(params.name, params.fieldPath)
.size(params.size)
val aggregation = termsAgg(name, fieldPath)
.size(size)
.order(
Seq(
TermsOrder("_count", asc = false),
Expand All @@ -119,9 +121,7 @@ trait AggregationsBuilder[AggregationRequest, Filter] {
include: List[String]
): NestedAggregation = {
val idAggregation =
toTermsAggregation(
params.copy(fieldPath = s"${params.fieldPath}.id"),
include)
toTermsAggregation("terms", s"${params.fieldPath}.id", params.size, include)
val labelAggregation =
termsAgg("labels", s"${params.fieldPath}.label").size(1)

Expand All @@ -143,9 +143,7 @@ trait AggregationsBuilder[AggregationRequest, Filter] {
include: List[String]
): NestedAggregation = {
val labelAggregation =
toTermsAggregation(
params.copy(fieldPath = s"${params.fieldPath}.label"),
include)
toTermsAggregation("terms", s"${params.fieldPath}.label", params.size, include)

NestedAggregation(
name = nestedAggregationName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,37 @@ class AggregationResultsTest extends AnyFunSpec with Matchers {
),
_aggregationsAsMap = Map(
"format" -> Map(
"nested" -> Map(
"format" -> Map(
"doc_count_error_upper_bound" -> 0,
"sum_other_doc_count" -> 0,
"buckets" -> List(
Map(
"key" -> "apple",
"doc_count" -> 393145
),
Map(
"key" -> "banana",
"doc_count" -> 5696
),
Map(
"key" -> "coconut",
"doc_count" -> 9
"filtered" -> Map(
"nested" -> Map(
"terms" -> Map(
"doc_count_error_upper_bound" -> 0,
"sum_other_doc_count" -> 0,
"buckets" -> List(
Map(
"key" -> "apple",
"doc_count" -> 393145
),
Map(
"key" -> "banana",
"doc_count" -> 5696
),
Map(
"key" -> "coconut",
"doc_count" -> 9
)
)
)
)
),
"nestedSelf" -> Map(
"format" -> Map(
"doc_count_error_upper_bound" -> 0,
"sum_other_doc_count" -> 0,
"buckets" -> List(
Map(
"key" -> "rare fruit",
"doc_count" -> 1
),
),
"nestedSelf" -> Map(
"terms" -> Map(
"doc_count_error_upper_bound" -> 0,
"sum_other_doc_count" -> 0,
"buckets" -> List(
Map(
"key" -> "rare fruit",
"doc_count" -> 1
)
)
)
)
)
Expand Down Expand Up @@ -103,14 +105,16 @@ class AggregationResultsTest extends AnyFunSpec with Matchers {
),
_aggregationsAsMap = Map(
"format" -> Map(
"nested" -> Map(
"format" -> Map(
"doc_count_error_upper_bound" -> 0,
"sum_other_doc_count" -> 0,
"buckets" -> List(
Map(
"key" -> "artichoke",
"doc_count" -> 393145
"filtered" -> Map(
"nested" -> Map(
"terms" -> Map(
"doc_count_error_upper_bound" -> 0,
"sum_other_doc_count" -> 0,
"buckets" -> List(
Map(
"key" -> "artichoke",
"doc_count" -> 393145
)
)
)
)
Expand Down Expand Up @@ -149,31 +153,33 @@ class AggregationResultsTest extends AnyFunSpec with Matchers {
_aggregationsAsMap = Map(
"format" -> Map(
"doc_count" -> 12345,
"nested" -> Map(
"format" -> Map(
"doc_count_error_upper_bound" -> 0,
"sum_other_doc_count" -> 0,
"buckets" -> List(
Map(
"key" -> "123",
"doc_count" -> 393145,
"labels" -> Map(
"buckets" -> List(
Map(
"key" -> "absinthe",
"doc_count" -> 393145
"filtered" -> Map(
"nested" -> Map(
"terms" -> Map(
"doc_count_error_upper_bound" -> 0,
"sum_other_doc_count" -> 0,
"buckets" -> List(
Map(
"key" -> "123",
"doc_count" -> 393145,
"labels" -> Map(
"buckets" -> List(
Map(
"key" -> "absinthe",
"doc_count" -> 393145
)
)
)
)
),
Map(
"key" -> "456",
"doc_count" -> 34,
"labels" -> Map(
"buckets" -> List(
Map(
"key" -> "apple",
"doc_count" -> 34
),
Map(
"key" -> "456",
"doc_count" -> 34,
"labels" -> Map(
"buckets" -> List(
Map(
"key" -> "apple",
"doc_count" -> 34
)
)
)
)
Expand Down
Loading

0 comments on commit 42428f3

Please sign in to comment.