diff --git a/search/src/main/scala/weco/api/search/models/Aggregation.scala b/search/src/main/scala/weco/api/search/models/Aggregation.scala index 02e3d10fe..aa4680ebc 100644 --- a/search/src/main/scala/weco/api/search/models/Aggregation.scala +++ b/search/src/main/scala/weco/api/search/models/Aggregation.scala @@ -7,32 +7,15 @@ import io.circe.optics.JsonPath._ import scala.util.{Try} -// This object maps an aggregation result from Elasticsearch into our -// Aggregation data type. The results from ES are of the form: -// -// { -// "buckets": [ -// { -// "key": "chi", -// "doc_count": 4, -// "filtered": { -// "doc_count": 3 -// } -// }, -// ... -// ], -// ... -// } -// object AggregationMapping { import weco.json.JsonUtil._ // We can't predict the key name of the resultant sub-aggregation. // This optic says, "for each key of the root object that has a key `buckets`, decode // the value of that field as an array of Buckets" - private val globalAggBuckets = root.each.buckets.each.as[RawAggregationBucket] + private val globalAggBuckets = root.nested.each.buckets.each.as[RawAggregationBucket] // This optic does the same for buckets within the self aggregation private val selfAggBuckets = - root.self.each.buckets.each.as[RawAggregationBucket] + root.nestedSelf.each.buckets.each.as[RawAggregationBucket] // When we use the self aggregation pattern, buckets are returned // in aggregations at multiple depths. This will return @@ -58,30 +41,22 @@ object AggregationMapping { @JsonKey("doc_count") count: Int ) - private def parseAggregationBuckets(aggregationJson: Json) = - bucketsFromAnywhere(aggregationJson).map { bucket => - val key = bucket.key.as[String].toOption.get - - AggregationBucket( - data = AggregationBucketData(id = key, label = key), - count = bucket.count - ) - }.toList - - private def parseNestedAggregationBuckets(aggregationJson: Json) = - bucketsFromAnywhere(aggregationJson).map { bucket => + private def parseNestedAggregationBuckets( + buckets: Seq[RawAggregationBucket] + ) = { + buckets.map { bucket => // Each ID-based aggregation bucket contains a list of label-based sub-aggregation buckets, // storing a list of labels associated with a given ID. - val labelBuckets = bucket.labelSubAggregation + val labelBucketsOption = bucket.labelSubAggregation .map( _.hcursor.downField("buckets").as[Seq[LabelBucket]] ) - .get // Retrieve the label from the first bucket. There might be multiple labels associated with a given ID, // but we only want to expose the most commonly used one to the frontend. val firstLabelBucket: Option[LabelBucket] = for { - labelBuckets <- labelBuckets.toOption + decoderResult <- labelBucketsOption + labelBuckets <- decoderResult.toOption firstBucket <- labelBuckets.headOption } yield firstBucket @@ -94,26 +69,20 @@ object AggregationMapping { data = AggregationBucketData(id = key, label), count = bucket.count ) - }.toList + }.toList.sortBy(b => (-b.count, b.data.label)) + } def aggregationParser( jsonString: String ): Try[Aggregation] = { println(jsonString) parse(jsonString) - .map( - json => - root.nested - .as[Json] - .getOption(json) - .orElse(root.nestedSelf.as[Json].getOption(json)) match { - case Some(nestedJson) => - Aggregation(parseNestedAggregationBuckets(nestedJson)) - case _ => Aggregation(parseAggregationBuckets(json)) - }) + .map { json => + val nestedBuckets = parseNestedAggregationBuckets(bucketsFromAnywhere(json)) + Aggregation(nestedBuckets) + } + } .toTry - } - } case class Aggregation(buckets: List[AggregationBucket]) diff --git a/search/src/main/scala/weco/api/search/models/request/ImageAggregationRequest.scala b/search/src/main/scala/weco/api/search/models/request/ImageAggregationRequest.scala index 2a830d48d..0bf32840f 100644 --- a/search/src/main/scala/weco/api/search/models/request/ImageAggregationRequest.scala +++ b/search/src/main/scala/weco/api/search/models/request/ImageAggregationRequest.scala @@ -4,7 +4,10 @@ sealed trait ImageAggregationRequest object ImageAggregationRequest { case object License extends ImageAggregationRequest - case object SourceContributorAgents extends ImageAggregationRequest - case object SourceGenres extends ImageAggregationRequest - case object SourceSubjects extends ImageAggregationRequest + case object SourceContributorAgentsLabel extends ImageAggregationRequest + case object SourceContributorAgentsId extends ImageAggregationRequest + case object SourceGenresLabel extends ImageAggregationRequest + case object SourceGenresId extends ImageAggregationRequest + case object SourceSubjectsLabel extends ImageAggregationRequest + case object SourceSubjectsId extends ImageAggregationRequest } diff --git a/search/src/main/scala/weco/api/search/models/request/WorkAggregationRequest.scala b/search/src/main/scala/weco/api/search/models/request/WorkAggregationRequest.scala index e7b1c77db..5517ddf80 100644 --- a/search/src/main/scala/weco/api/search/models/request/WorkAggregationRequest.scala +++ b/search/src/main/scala/weco/api/search/models/request/WorkAggregationRequest.scala @@ -7,11 +7,18 @@ object WorkAggregationRequest { case object ProductionDate extends WorkAggregationRequest - case object Genre extends WorkAggregationRequest + case object GenreLabel extends WorkAggregationRequest - case object Subject extends WorkAggregationRequest + case object GenreId extends WorkAggregationRequest - case object Contributor extends WorkAggregationRequest + + case object SubjectLabel extends WorkAggregationRequest + + case object SubjectId extends WorkAggregationRequest + + case object ContributorLabel extends WorkAggregationRequest + + case object ContributorId extends WorkAggregationRequest case object Languages extends WorkAggregationRequest diff --git a/search/src/main/scala/weco/api/search/rest/ImagesParams.scala b/search/src/main/scala/weco/api/search/rest/ImagesParams.scala index 063a08103..49c20c04e 100644 --- a/search/src/main/scala/weco/api/search/rest/ImagesParams.scala +++ b/search/src/main/scala/weco/api/search/rest/ImagesParams.scala @@ -23,9 +23,8 @@ object SingleImageParams extends QueryParamsUtils { def parse = parameter( "include".as[SingleImageIncludes].? - ).tmap { - case Tuple1(include) => - SingleImageParams(include) + ).tmap { case Tuple1(include) => + SingleImageParams(include) } implicit val includesDecoder: Decoder[SingleImageIncludes] = @@ -125,7 +124,8 @@ object MultipleImagesParams extends QueryParamsUtils { Left( s"'$colorString' is not a valid value. Please supply a single hex string." ) - }) + } + ) implicit val includesDecoder: Decoder[MultipleImagesIncludes] = decodeOneOfCommaSeparated( @@ -138,9 +138,12 @@ object MultipleImagesParams extends QueryParamsUtils { implicit val aggregationsDecoder: Decoder[List[ImageAggregationRequest]] = decodeOneOfCommaSeparated( "locations.license" -> ImageAggregationRequest.License, - "source.contributors.agent.label" -> ImageAggregationRequest.SourceContributorAgents, - "source.genres.label" -> ImageAggregationRequest.SourceGenres, - "source.subjects.label" -> ImageAggregationRequest.SourceSubjects + "source.contributors.agent.label" -> ImageAggregationRequest.SourceContributorAgentsLabel, + "source.contributors.agent.id" -> ImageAggregationRequest.SourceContributorAgentsId, + "source.genres.label" -> ImageAggregationRequest.SourceGenresLabel, + "source.genres.id" -> ImageAggregationRequest.SourceGenresId, + "source.subjects.label" -> ImageAggregationRequest.SourceSubjectsLabel, + "source.subjects.id" -> ImageAggregationRequest.SourceSubjectsId ) implicit val sortDecoder: Decoder[List[SortRequest]] = diff --git a/search/src/main/scala/weco/api/search/rest/WorksParams.scala b/search/src/main/scala/weco/api/search/rest/WorksParams.scala index b550d9f5a..281083330 100644 --- a/search/src/main/scala/weco/api/search/rest/WorksParams.scala +++ b/search/src/main/scala/weco/api/search/rest/WorksParams.scala @@ -302,11 +302,14 @@ object MultipleWorksParams extends QueryParamsUtils { implicit val aggregationsDecoder: Decoder[List[WorkAggregationRequest]] = decodeOneOfCommaSeparated( "workType" -> WorkAggregationRequest.Format, - "genres.label" -> WorkAggregationRequest.Genre, + "genres.label" -> WorkAggregationRequest.GenreLabel, + "genres.id" -> WorkAggregationRequest.GenreId, "production.dates" -> WorkAggregationRequest.ProductionDate, - "subjects.label" -> WorkAggregationRequest.Subject, + "subjects.label" -> WorkAggregationRequest.SubjectLabel, + "subjects.id" -> WorkAggregationRequest.SubjectId, "languages" -> WorkAggregationRequest.Languages, - "contributors.agent.label" -> WorkAggregationRequest.Contributor, + "contributors.agent.label" -> WorkAggregationRequest.ContributorLabel, + "contributors.agent.id" -> WorkAggregationRequest.ContributorId, "items.locations.license" -> WorkAggregationRequest.License, "availabilities" -> WorkAggregationRequest.Availabilities ) diff --git a/search/src/main/scala/weco/api/search/services/AggregationsBuilder.scala b/search/src/main/scala/weco/api/search/services/AggregationsBuilder.scala index 9fc373cc8..1f6c906f7 100644 --- a/search/src/main/scala/weco/api/search/services/AggregationsBuilder.scala +++ b/search/src/main/scala/weco/api/search/services/AggregationsBuilder.scala @@ -28,7 +28,8 @@ case class AggregationParams( trait AggregationsBuilder[AggregationRequest, Filter] { def pairedAggregationRequests( - filter: Filter with Pairable): List[AggregationRequest] + filter: Filter with Pairable + ): List[AggregationRequest] def getAggregationParams(request: AggregationRequest): AggregationParams def buildFilterQuery: PartialFunction[Filter, Query] @@ -43,67 +44,56 @@ trait AggregationsBuilder[AggregationRequest, Filter] { def getAggregations( filters: List[Filter with Pairable], - aggregationRequests: List[AggregationRequest]): Seq[FilterAggregation] = + aggregationRequests: List[AggregationRequest] + ): Seq[FilterAggregation] = aggregationRequests.map { aggregationRequest => val aggregationParams = getAggregationParams(aggregationRequest) - pairedFilter(filters, aggregationRequest) match { - case Some(paired) => - val nonPairedQueries = - filters.filterNot(_ == paired).map(buildFilterQuery) - val pairedQuery = buildFilterQuery(paired) - toFilterAggregation( - aggregationParams, - nonPairedQueries, - Some(pairedQuery) - ) - case _ => - toFilterAggregation( - aggregationParams, - filters.map(buildFilterQuery), - None - ) - } + val (queries, pairedQuery) = + pairedFilter(filters, aggregationRequest) match { + case Some(paired) => + val nonPairedQueries = + filters.filterNot(_ == paired).map(buildFilterQuery) + (nonPairedQueries, Some(buildFilterQuery(paired))) + case _ => + (filters.map(buildFilterQuery), None) + } + + toFilterAggregation(aggregationParams, queries, pairedQuery) } + private def toFilterAggregation( params: AggregationParams, query: List[Query], pairedQuery: Option[Query] ): FilterAggregation = { - val toAggregation: (AggregationParams, List[String]) => Aggregation = + val toAggregation: AggregationParams => Aggregation = params.aggregationType match { case AggregationType.LabeledIdAggregation => toLabeledIdAggregation - case AggregationType.LabelOnlyAggregation => toTermsAggregation + case AggregationType.LabelOnlyAggregation => toLabelOnlyAggregation + } + + val toSelfAggregation: (AggregationParams, List[String]) => Aggregation = + params.aggregationType match { + case AggregationType.LabeledIdAggregation => toSelfLabeledIdAggregation + case AggregationType.LabelOnlyAggregation => toSelfLabelOnlyAggregation } val selfAggregation: Option[Aggregation] = pairedQuery match { case Some(TermsQuery(_, values, _, _, _, _, _)) => val pairedValues = values.map(value => value.toString).toList - Some( - toAggregation( - AggregationParams( - "self", - params.fieldPath, - params.size, - params.aggregationType), - pairedValues)) + Some(toSelfAggregation(params, pairedValues)) case _ => None } FilterAggregation( name = params.name, boolQuery.filter(query), - subaggs = Seq( - toAggregation(params, List()), - selfAggregation.orNull - ).filter(_ != null) + subaggs = Seq(Some(toAggregation(params)), selfAggregation).flatten ) } - private def toTermsAggregation( - params: AggregationParams, - include: List[String] - ): TermsAggregation = + private def toTermsAggregation(params: AggregationParams): TermsAggregation = termsAgg(params.name, params.fieldPath) .size(params.size) .order( @@ -112,7 +102,13 @@ trait AggregationsBuilder[AggregationRequest, Filter] { TermsOrder("_key", asc = true) ) ) -// .minDocCount(1) + + private def toSelfTermsAggregation( + params: AggregationParams, + include: List[String] + ): TermsAggregation = + toTermsAggregation(params) + .minDocCount(0) .includeExactValues(include) /** Each aggregatable field is indexed as a nested field with an `id` value and a `label` value. All aggregations @@ -121,31 +117,66 @@ trait AggregationsBuilder[AggregationRequest, Filter] { * For example, different Works can use different labels for a given LoC Subject Heading.) */ private def toLabeledIdAggregation( + params: AggregationParams + ): NestedAggregation = { + val idAggregation = + toTermsAggregation(params.copy(fieldPath = s"${params.fieldPath}.id")) + val labelAggregation = + termsAgg("labels", s"${params.fieldPath}.label").size(1) + + NestedAggregation( + name = "nested", + path = params.fieldPath, + subaggs = List(idAggregation.subAggregations(labelAggregation)) + ) + } + + private def toLabelOnlyAggregation( + params: AggregationParams + ): NestedAggregation = { + val labelAggregation = + toTermsAggregation(params.copy(fieldPath = s"${params.fieldPath}.label")) + + NestedAggregation( + name = "nested", + path = params.fieldPath, + subaggs = List(labelAggregation) + ) + } + + private def toSelfLabeledIdAggregation( params: AggregationParams, include: List[String] ): NestedAggregation = { val idAggregation = - toTermsAggregation( - AggregationParams( - params.name, - s"${params.fieldPath}.id", - params.size, - params.aggregationType), - include) + toSelfTermsAggregation( + params.copy(fieldPath = s"${params.fieldPath}.id"), + include + ) val labelAggregation = termsAgg("labels", s"${params.fieldPath}.label").size(1) - val nestedAggregation = idAggregation.subAggregations(labelAggregation) - val name = if (include.nonEmpty) { - "nestedSelf" - } else { - "nested" - } + NestedAggregation( + name = "nestedSelf", + path = params.fieldPath, + subaggs = List(idAggregation.subAggregations(labelAggregation)) + ) + } + + private def toSelfLabelOnlyAggregation( + params: AggregationParams, + include: List[String] + ): NestedAggregation = { + val labelAggregation = + toSelfTermsAggregation( + params.copy(fieldPath = s"${params.fieldPath}.label"), + include + ) NestedAggregation( - name = name, + name = "nestedSelf", path = params.fieldPath, - subaggs = List(nestedAggregation) + subaggs = List(labelAggregation) ) } } diff --git a/search/src/main/scala/weco/api/search/services/ImagesAggregationsBuilder.scala b/search/src/main/scala/weco/api/search/services/ImagesAggregationsBuilder.scala index 91b8ee372..7c2399fed 100644 --- a/search/src/main/scala/weco/api/search/services/ImagesAggregationsBuilder.scala +++ b/search/src/main/scala/weco/api/search/services/ImagesAggregationsBuilder.scala @@ -7,6 +7,31 @@ import weco.api.search.services.ImagesRequestBuilder.buildImageFilterQuery object ImagesAggregationsBuilder extends AggregationsBuilder[ImageAggregationRequest, ImageFilter] { + + private def getSourceGenreParams(aggregationType: AggregationType) = + AggregationParams( + "sourceGenres", + "aggregatableValues.source.genres", + 20, + aggregationType + ) + + private def getSourceSubjectParams(aggregationType: AggregationType) = + AggregationParams( + "sourceSubjects", + "aggregatableValues.source.subjects", + 20, + aggregationType + ) + + private def getSourceContributorParams(aggregationType: AggregationType) = + AggregationParams( + "sourceContributorAgents", + "aggregatableValues.source.contributors.agent", + 20, + aggregationType + ) + override def getAggregationParams( aggReq: ImageAggregationRequest ): AggregationParams = @@ -24,29 +49,23 @@ object ImagesAggregationsBuilder AggregationType.LabeledIdAggregation ) - case ImageAggregationRequest.SourceContributorAgents => - AggregationParams( - "sourceContributorAgents", - "aggregatableValues.source.contributors.agent", - 20, - AggregationType.LabeledIdAggregation - ) + case ImageAggregationRequest.SourceContributorAgentsLabel => + getSourceContributorParams(AggregationType.LabelOnlyAggregation) - case ImageAggregationRequest.SourceGenres => - AggregationParams( - "sourceGenres", - "aggregatableValues.source.genres", - 20, - AggregationType.LabeledIdAggregation - ) + case ImageAggregationRequest.SourceContributorAgentsId => + getSourceContributorParams(AggregationType.LabeledIdAggregation) - case ImageAggregationRequest.SourceSubjects => - AggregationParams( - "sourceSubjects", - "aggregatableValues.source.subjects", - 20, - AggregationType.LabeledIdAggregation - ) + case ImageAggregationRequest.SourceGenresLabel => + getSourceGenreParams(AggregationType.LabelOnlyAggregation) + + case ImageAggregationRequest.SourceGenresId => + getSourceGenreParams(AggregationType.LabeledIdAggregation) + + case ImageAggregationRequest.SourceSubjectsLabel => + getSourceSubjectParams(AggregationType.LabelOnlyAggregation) + + case ImageAggregationRequest.SourceSubjectsId => + getSourceSubjectParams(AggregationType.LabeledIdAggregation) } override def pairedAggregationRequests( @@ -55,9 +74,9 @@ object ImagesAggregationsBuilder filter match { case _: LicenseFilter => List(ImageAggregationRequest.License) case _: ContributorsFilter => - List(ImageAggregationRequest.SourceContributorAgents) - case _: GenreFilter => List(ImageAggregationRequest.SourceGenres) - case _: SubjectLabelFilter => List(ImageAggregationRequest.SourceSubjects) + List(ImageAggregationRequest.SourceContributorAgentsLabel) + case _: GenreFilter => List(ImageAggregationRequest.SourceGenresLabel) + case _: SubjectLabelFilter => List(ImageAggregationRequest.SourceSubjectsLabel) } override def buildFilterQuery: PartialFunction[ImageFilter, Query] = buildImageFilterQuery diff --git a/search/src/main/scala/weco/api/search/services/WorksAggregationsBuilder.scala b/search/src/main/scala/weco/api/search/services/WorksAggregationsBuilder.scala index 6b9549a18..01181886e 100644 --- a/search/src/main/scala/weco/api/search/services/WorksAggregationsBuilder.scala +++ b/search/src/main/scala/weco/api/search/services/WorksAggregationsBuilder.scala @@ -17,6 +17,31 @@ import weco.api.search.services.WorksRequestBuilder.buildWorkFilterQuery object WorksAggregationsBuilder extends AggregationsBuilder[WorkAggregationRequest, WorkFilter] { + + private def getGenreParams(aggregationType: AggregationType) = + AggregationParams( + "genres", + "aggregatableValues.genres", + 20, + aggregationType + ) + + private def getSubjectParams(aggregationType: AggregationType) = + AggregationParams( + "subjects", + "aggregatableValues.subjects", + 20, + aggregationType + ) + + private def getContributorParams(aggregationType: AggregationType) = + AggregationParams( + "contributors", + "aggregatableValues.contributors.agent", + 20, + aggregationType + ) + override def getAggregationParams( aggReq: WorkAggregationRequest ): AggregationParams = @@ -42,29 +67,23 @@ object WorksAggregationsBuilder AggregationType.LabeledIdAggregation ) - case WorkAggregationRequest.Genre => - AggregationParams( - "genres", - "aggregatableValues.genres", - 20, - AggregationType.LabeledIdAggregation - ) + case WorkAggregationRequest.GenreLabel => + getGenreParams(AggregationType.LabelOnlyAggregation) - case WorkAggregationRequest.Subject => - AggregationParams( - "subjects", - "aggregatableValues.subjects", - 20, - AggregationType.LabeledIdAggregation - ) + case WorkAggregationRequest.GenreId => + getGenreParams(AggregationType.LabeledIdAggregation) - case WorkAggregationRequest.Contributor => - AggregationParams( - "contributors", - "aggregatableValues.contributors.agent", - 20, - AggregationType.LabeledIdAggregation - ) + case WorkAggregationRequest.SubjectLabel => + getSubjectParams(AggregationType.LabelOnlyAggregation) + + case WorkAggregationRequest.SubjectId => + getSubjectParams(AggregationType.LabeledIdAggregation) + + case WorkAggregationRequest.ContributorLabel => + getContributorParams(AggregationType.LabelOnlyAggregation) + + case WorkAggregationRequest.ContributorId => + getContributorParams(AggregationType.LabeledIdAggregation) case WorkAggregationRequest.Languages => AggregationParams( @@ -107,10 +126,11 @@ object WorksAggregationsBuilder filter match { case _: FormatFilter => List(WorkAggregationRequest.Format) case _: LanguagesFilter => List(WorkAggregationRequest.Languages) - case _: GenreFilter => List(WorkAggregationRequest.Genre) - case _: SubjectLabelFilter => List(WorkAggregationRequest.Subject) - case _: ContributorsFilter => List(WorkAggregationRequest.Contributor) - case _: LicenseFilter => List(WorkAggregationRequest.License) + case _: GenreFilter => List(WorkAggregationRequest.GenreLabel) + case _: SubjectLabelFilter => List(WorkAggregationRequest.SubjectLabel) + case _: ContributorsFilter => + List(WorkAggregationRequest.ContributorLabel) + case _: LicenseFilter => List(WorkAggregationRequest.License) case _: AvailabilitiesFilter => List(WorkAggregationRequest.Availabilities) } diff --git a/search/src/test/scala/weco/api/search/generators/AggregationDocumentGenerators.scala b/search/src/test/scala/weco/api/search/generators/AggregationDocumentGenerators.scala index ad6f02461..e0b118493 100644 --- a/search/src/test/scala/weco/api/search/generators/AggregationDocumentGenerators.scala +++ b/search/src/test/scala/weco/api/search/generators/AggregationDocumentGenerators.scala @@ -9,13 +9,9 @@ trait AggregationDocumentGenerators { def createWorkDocument( docId: String, title: String = "", - aggregables: Map[String, Seq[String]] + aggregables: Map[String, Seq[Json]], + filterables: Map[String, Json] ): TestDocument = { - - val filterables: Map[String, Json] = aggregables.map { - case (key, values) => key -> values.asJson - } - val queryables = filterables.filterKeys { case "contributors.agent.label" => true case "genres.label" => true @@ -23,16 +19,11 @@ trait AggregationDocumentGenerators { case _ => false } + ("title" -> title.asJson) - val aggregablesJson = aggregables.map { - case (key, values) => - key -> values.map(toAggregable).asJson - } asJson - val doc = Json.obj( "display" -> Json.obj("id" -> docId.asJson, "title" -> title.asJson), "type" -> Json.fromString("Visible"), "query" -> queryables.asJson, - "aggregatableValues" -> aggregablesJson, + "aggregatableValues" -> aggregables.asJson, "filterableValues" -> filterables.asJson ) @@ -42,16 +33,23 @@ trait AggregationDocumentGenerators { ) } + def createAggregatableField(id: String, label: String): Json = { + Json.obj( + "id" -> id.asJson, + "label" -> label.asJson + ) + } + + def createAggregatableField(label: String): Json = { + createAggregatableField(label, label) + } + def createImageDocument( docId: String, title: String = "", - aggregables: Map[String, Seq[String]] + aggregables: Map[String, Seq[Json]], + filterables: Map[String, Json] ): TestDocument = { - - val filterables: Map[String, Json] = aggregables.map { - case (key, values) => key -> values.asJson - } - val queryables = filterables.filterKeys { case "source.contributors.agent.label" => true case "source.genres.concepts.label" => true @@ -59,15 +57,10 @@ trait AggregationDocumentGenerators { case _ => false } + ("source.title" -> title.asJson) - val aggregablesJson = aggregables.map { - case (key, values) => - key -> values.map(toAggregable).asJson - } asJson - val doc = Json.obj( "display" -> Json.obj("id" -> docId.asJson, "title" -> title.asJson), "query" -> queryables.asJson, - "aggregatableValues" -> aggregablesJson, + "aggregatableValues" -> aggregables.asJson, "filterableValues" -> filterables.asJson, "vectorValues" -> Json.obj( "features" -> featuresPlaceholder.asJson @@ -80,9 +73,6 @@ trait AggregationDocumentGenerators { ) } - private def toAggregable(value: String): String = - s"""{"label":${value.asJson}}""" - private val featuresPlaceholder: Seq[Float] = normalize(Seq.fill(4096)(random.nextGaussian().toFloat)) diff --git a/search/src/test/scala/weco/api/search/generators/BucketGenerators.scala b/search/src/test/scala/weco/api/search/generators/BucketGenerators.scala index 962411de8..004d38f0d 100644 --- a/search/src/test/scala/weco/api/search/generators/BucketGenerators.scala +++ b/search/src/test/scala/weco/api/search/generators/BucketGenerators.scala @@ -4,9 +4,8 @@ import io.circe.parser.parse trait BucketGenerators { protected def toKeywordBucket( - dataType: String, count: Int, - code: String, + id: String, label: String ): Json = parse( @@ -14,25 +13,12 @@ trait BucketGenerators { |{ |"count": $count, |"data": { - | "id": "$code", - | "label": "$label", - | "type": "$dataType" - |}, - |"type": "AggregationBucket" - |}""".stripMargin).right.get - - protected def toUnidentifiedBucket( - count: Int, - label: String - ): Json = - parse( - s""" - |{ - |"count": $count, - |"data": { + | "id": "$id", | "label": "$label" |}, |"type": "AggregationBucket" |}""".stripMargin).right.get + protected def toKeywordBucket(count: Int, id: String): Json = + toKeywordBucket(count, id, id) } diff --git a/search/src/test/scala/weco/api/search/images/ImagesFacetingTest.scala b/search/src/test/scala/weco/api/search/images/ImagesFacetingTest.scala index 915503bf0..b90dd2776 100644 --- a/search/src/test/scala/weco/api/search/images/ImagesFacetingTest.scala +++ b/search/src/test/scala/weco/api/search/images/ImagesFacetingTest.scala @@ -1,12 +1,11 @@ package weco.api.search.images +import io.circe.Json +import io.circe.syntax.EncoderOps import org.scalatest.GivenWhenThen import weco.api.search.FacetingFeatures import weco.api.search.fixtures.{JsonServer, LocalJsonServerFixture} -import weco.api.search.generators.{ - AggregationDocumentGenerators, - BucketGenerators -} +import weco.api.search.generators.{AggregationDocumentGenerators, BucketGenerators} import weco.fixtures.TestWith class ImagesFacetingTest @@ -37,8 +36,8 @@ class ImagesFacetingTest aggregationFields = Seq("source.genres.label"), expectedAggregationBuckets = Map( "source.genres.label" -> Seq( - toUnidentifiedBucket(2, "Daguerreotype"), - toUnidentifiedBucket(1, "Oil Painting") + toKeywordBucket(2, "Daguerreotype"), + toKeywordBucket(1, "Oil Painting") ) ) ) @@ -47,13 +46,13 @@ class ImagesFacetingTest aggregationFields = Seq("source.genres.label", "source.subjects.label"), expectedAggregationBuckets = Map( "source.genres.label" -> Seq( - toUnidentifiedBucket(2, "Daguerreotype"), - toUnidentifiedBucket(1, "Oil Painting") + toKeywordBucket(2, "Daguerreotype"), + toKeywordBucket(1, "Oil Painting") ), "source.subjects.label" -> Seq( - toUnidentifiedBucket(2, "Fruit"), - toUnidentifiedBucket(2, "Surgery"), - toUnidentifiedBucket(1, "Nursing") + toKeywordBucket(2, "Fruit"), + toKeywordBucket(2, "Surgery"), + toKeywordBucket(1, "Nursing") ) ) ) @@ -64,13 +63,13 @@ class ImagesFacetingTest Seq("source.subjects.label", "source.contributors.agent.label"), expectedAggregationBuckets = Map( "source.subjects.label" -> Seq( - toUnidentifiedBucket(2, "Fruit"), - toUnidentifiedBucket(1, "Nursing"), - toUnidentifiedBucket(1, "Surgery") + toKeywordBucket(2, "Fruit"), + toKeywordBucket(1, "Nursing"), + toKeywordBucket(1, "Surgery") ), "source.contributors.agent.label" -> Seq( - toUnidentifiedBucket(1, "BJ Hunnicut"), - toUnidentifiedBucket(1, "Margaret Houlihan") + toKeywordBucket(1, "BJ Hunnicut"), + toKeywordBucket(1, "Margaret Houlihan") ) ) ) @@ -79,7 +78,7 @@ class ImagesFacetingTest aggregationFields = Seq("source.genres.label"), filters = Seq(("source.subjects.label", "Fruit")), expectedAggregationBuckets = Map( - "source.genres.label" -> Seq(toUnidentifiedBucket(2, "Daguerreotype")) + "source.genres.label" -> Seq(toKeywordBucket(2, "Daguerreotype")) ) ) @@ -88,8 +87,8 @@ class ImagesFacetingTest filters = Seq(("source.genres.label", "Oil%20Painting")), expectedAggregationBuckets = Map( "source.genres.label" -> Seq( - toUnidentifiedBucket(2, "Daguerreotype"), - toUnidentifiedBucket(1, "Oil Painting") + toKeywordBucket(2, "Daguerreotype"), + toKeywordBucket(1, "Oil Painting") ) ) ) @@ -102,9 +101,9 @@ class ImagesFacetingTest ), expectedAggregationBuckets = Map( "source.subjects.label" -> Seq( - toUnidentifiedBucket(2, "Fruit"), - toUnidentifiedBucket(2, "Surgery"), - toUnidentifiedBucket(1, "Nursing") + toKeywordBucket(2, "Fruit"), + toKeywordBucket(2, "Surgery"), + toKeywordBucket(1, "Nursing") ) ) ) @@ -117,13 +116,13 @@ class ImagesFacetingTest ), expectedAggregationBuckets = Map( "source.subjects.label" -> Seq( - toUnidentifiedBucket(2, "Fruit"), - toUnidentifiedBucket(1, "Nursing"), - toUnidentifiedBucket(1, "Surgery") // Only one Surgery is a Daguerreotype + toKeywordBucket(2, "Fruit"), + toKeywordBucket(1, "Nursing"), + toKeywordBucket(1, "Surgery") // Only one Surgery is a Daguerreotype ), "source.genres.label" -> Seq( - toUnidentifiedBucket(1, "Daguerreotype"), // Only one Daguerreotype is Surgery - toUnidentifiedBucket(1, "Oil Painting") + toKeywordBucket(1, "Daguerreotype"), // Only one Daguerreotype is Surgery + toKeywordBucket(1, "Oil Painting") ) ) ) @@ -137,13 +136,13 @@ class ImagesFacetingTest ), expectedAggregationBuckets = Map( "source.subjects.label" -> Seq( - toUnidentifiedBucket(1, "Surgery"), - toUnidentifiedBucket(0, "Fruit") + toKeywordBucket(1, "Surgery"), + toKeywordBucket(0, "Fruit") ), "source.contributors.agent.label" -> Seq( - toUnidentifiedBucket(1, "BJ Hunnicut"), - toUnidentifiedBucket(1, "Margaret Houlihan"), - toUnidentifiedBucket(0, "Linden Cullen") + toKeywordBucket(1, "BJ Hunnicut"), + toKeywordBucket(1, "Margaret Houlihan"), + toKeywordBucket(0, "Linden Cullen") ) ) ) @@ -165,7 +164,7 @@ class ImagesFacetingTest expectedAggregationBuckets = Map( "source.genres.label" -> Seq( // Only Hunnicut is Surgery+mash - toUnidentifiedBucket(1, "Daguerreotype") + toKeywordBucket(1, "Daguerreotype") ) ) ) @@ -175,8 +174,8 @@ class ImagesFacetingTest filters = Seq(("source.contributors.agent.label", "Mark%20Sloan")), expectedAggregationBuckets = Map( "source.contributors.agent.label" -> (('a' to 't').map( - n => toUnidentifiedBucket(2, s"Beverley Crusher ($n)") - ) :+ toUnidentifiedBucket(1, "Mark Sloan")) + n => toKeywordBucket(2, s"Beverley Crusher ($n)") + ) :+ toKeywordBucket(1, "Mark Sloan")) ) ) @@ -190,13 +189,13 @@ class ImagesFacetingTest aggregationFields = Seq("source.contributors.agent.label"), expectedAggregationBuckets = Map( "source.contributors.agent.label" -> (Seq( - toUnidentifiedBucket(3, "Beverley Crusher (a)") + toKeywordBucket(3, "Beverley Crusher (a)") ) ++ ('b' to 't').map( - n => toUnidentifiedBucket(2, s"Beverley Crusher ($n)") + n => toKeywordBucket(2, s"Beverley Crusher ($n)") ) ++ Seq( - toUnidentifiedBucket(2, "Beverley Crusher (z)"), - toUnidentifiedBucket(1, "Mark Sloan"), - toUnidentifiedBucket(1, "Yuri Zhivago") + toKeywordBucket(2, "Beverley Crusher (z)"), + toKeywordBucket(1, "Mark Sloan"), + toKeywordBucket(1, "Yuri Zhivago") )) ) ) @@ -207,9 +206,9 @@ class ImagesFacetingTest aggregationFields = Seq("source.contributors.agent.label"), expectedAggregationBuckets = Map( "source.contributors.agent.label" -> Seq( - toUnidentifiedBucket(1, "Beverley Crusher (a)"), - toUnidentifiedBucket(1, "Yuri Zhivago"), - toUnidentifiedBucket(0, "Mark Sloan") + toKeywordBucket(1, "Beverley Crusher (a)"), + toKeywordBucket(1, "Yuri Zhivago"), + toKeywordBucket(0, "Mark Sloan") ) ) ) @@ -218,27 +217,42 @@ class ImagesFacetingTest s"hunn1234", "mash tv", Map( - "source.contributors.agent.label" -> Seq("BJ Hunnicut"), - "source.genres.label" -> Seq("Daguerreotype"), - "source.subjects.label" -> Seq("Fruit", "Surgery") - ) + "source.contributors.agent" -> Seq(createAggregatableField("BJ Hunnicut")), + "source.genres" -> Seq(createAggregatableField("Daguerreotype")), + "source.subjects" -> Seq(createAggregatableField("Fruit"), createAggregatableField("Surgery")) + ), + Map( + "source.contributors.agent.label" -> Seq("BJ Hunnicut").asJson, + "source.genres.label" -> Seq("Daguerreotype").asJson, + "source.subjects.label" -> Seq("Fruit", "Surgery").asJson + ), ) private val houlihanDaguerrotype = createImageDocument( s"houl1234", "mash tv film", Map( - "source.contributors.agent.label" -> Seq("Margaret Houlihan"), - "source.genres.label" -> Seq("Daguerreotype"), - "source.subjects.label" -> Seq("Fruit", "Nursing") - ) + "source.contributors.agent" -> Seq(createAggregatableField("Margaret Houlihan")), + "source.genres" -> Seq(createAggregatableField("Daguerreotype")), + "source.subjects" -> Seq(createAggregatableField("Fruit"), createAggregatableField("Nursing")) + ), + Map( + "source.contributors.agent.label" -> Seq("Margaret Houlihan").asJson, + "source.genres.label" -> Seq("Daguerreotype").asJson, + "source.subjects.label" -> Seq("Fruit", "Nursing").asJson + ), ) private val cullenOilPainting = createImageDocument( s"cull1234", "holby tv", Map( - "source.contributors.agent.label" -> Seq("Linden Cullen"), - "source.genres.label" -> Seq("Oil Painting"), - "source.subjects.label" -> Seq("Surgery") + "source.contributors.agent" -> Seq(createAggregatableField("Linden Cullen")), + "source.genres" -> Seq(createAggregatableField("Oil Painting")), + "source.subjects" -> Seq(createAggregatableField("Surgery")) + ), + Map( + "source.contributors.agent.label" -> Seq("Linden Cullen").asJson, + "source.genres.label" -> Seq("Oil Painting").asJson, + "source.subjects.label" -> Seq("Surgery").asJson ) ) @@ -246,8 +260,12 @@ class ImagesFacetingTest s"jone1234", "who tv", Map( - "source.contributors.agent.label" -> Seq("Martha Jones"), - "source.subjects.label" -> Seq("Xenobiology") + "source.contributors.agent" -> Seq(createAggregatableField("Martha Jones")), + "source.subjects" -> Seq(createAggregatableField("Xenobiology")) + ), + Map( + "source.contributors.agent.label" -> Seq("Martha Jones").asJson, + "source.subjects.label" -> Seq("Xenobiology").asJson ) ) @@ -261,34 +279,34 @@ class ImagesFacetingTest jonesNoGenre ) + private val top20AggregatableContributors = ('a' to 'z').map(n => + createAggregatableField(s"Beverley Crusher ($n)") + ) + private val top21AggregatableContributors = + top20AggregatableContributors ++ Seq(createAggregatableField("Mark Sloan")) + private val top20FilterableContributors = top20AggregatableContributors.map(_.hcursor.downField("label").as[Json].toOption.get).asJson + private val top21FilterableContributors = top21AggregatableContributors.map(_.hcursor.downField("label").as[Json].toOption.get).asJson + private val top21Contributors = Seq( createImageDocument( s"abadcafe", "top 20 only", - Map( - "source.contributors.agent.label" -> ('a' to 'z') - .map(n => s"Beverley Crusher ($n)") - ) + Map("source.contributors.agent" -> top20AggregatableContributors), + Map("source.contributors.agent.label" -> top20FilterableContributors) ), createImageDocument( "goodcafe", "top 20 and hapax", - Map( - "source.contributors.agent.label" -> (('a' to 'z') - .map(n => s"Beverley Crusher ($n)") :+ "Mark Sloan") - ) + Map("source.contributors.agent" -> top21AggregatableContributors), + Map("source.contributors.agent.label" -> top21FilterableContributors) ) ) private val multipleUncommonContributors = top21Contributors :+ createImageDocument( "baadf00d", "top 1 and hapax legomenon", - Map( - "source.contributors.agent.label" -> Seq( - "Yuri Zhivago", - "Beverley Crusher (a)" - ) - ) + Map("source.contributors.agent" -> Seq(createAggregatableField("Yuri Zhivago"),createAggregatableField("Beverley Crusher (a)"))), + Map("source.contributors.agent.label" -> Seq("Yuri Zhivago", "Beverley Crusher (a)").asJson) ) private val givens: Map[String, Seq[TestDocument]] = Map( diff --git a/search/src/test/scala/weco/api/search/matchers/AggregationRequestMatchers.scala b/search/src/test/scala/weco/api/search/matchers/AggregationRequestMatchers.scala index c24596e9e..00f217375 100644 --- a/search/src/test/scala/weco/api/search/matchers/AggregationRequestMatchers.scala +++ b/search/src/test/scala/weco/api/search/matchers/AggregationRequestMatchers.scala @@ -1,6 +1,9 @@ package weco.api.search.matchers -import com.sksamuel.elastic4s.requests.searches.aggs.{FilterAggregation, NestedAggregation, TermsAggregation} +import com.sksamuel.elastic4s.requests.searches.aggs.{ + FilterAggregation, + TermsAggregation +} import com.sksamuel.elastic4s.requests.searches.queries.Query import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery import org.scalatest.matchers.{HavePropertyMatchResult, HavePropertyMatcher} @@ -36,10 +39,9 @@ trait AggregationRequestMatchers { def aggregationField( expectedField: String - ): HavePropertyMatcher[FilterAggregation, String] = - (left: FilterAggregation) => { - val actualField = - left.subaggs.head.asInstanceOf[NestedAggregation].subaggs.head.asInstanceOf[TermsAggregation].field.get + ): HavePropertyMatcher[TermsAggregation, String] = + (left: TermsAggregation) => { + val actualField = left.field.get HavePropertyMatchResult( actualField == expectedField, diff --git a/search/src/test/scala/weco/api/search/models/AggregationResultsTest.scala b/search/src/test/scala/weco/api/search/models/AggregationResultsTest.scala index 8be06bb87..c822104fd 100644 --- a/search/src/test/scala/weco/api/search/models/AggregationResultsTest.scala +++ b/search/src/test/scala/weco/api/search/models/AggregationResultsTest.scala @@ -25,28 +25,42 @@ class AggregationResultsTest extends AnyFunSpec with Matchers { ), _aggregationsAsMap = Map( "format" -> Map( - "format" -> Map( - "doc_count_error_upper_bound" -> 0, - "sum_other_doc_count" -> 0, - "buckets" -> List( - Map( - "key" -> "apple", - "doc_count" -> 393145 - ), - Map( - "key" -> "banana", - "doc_count" -> 5696 - ), - Map( - "key" -> "coconut", - "doc_count" -> 9 + "nested" -> Map( + "format" -> Map( + "doc_count_error_upper_bound" -> 0, + "sum_other_doc_count" -> 0, + "buckets" -> List( + Map( + "key" -> "apple", + "doc_count" -> 393145 + ), + Map( + "key" -> "banana", + "doc_count" -> 5696 + ), + Map( + "key" -> "coconut", + "doc_count" -> 9 + ) + ) + ) + ), + "nestedSelf" -> Map( + "format" -> Map( + "doc_count_error_upper_bound" -> 0, + "sum_other_doc_count" -> 0, + "buckets" -> List( + Map( + "key" -> "rare fruit", + "doc_count" -> 1 + ), ) ) ) ) ) ) - println(searchResponse) + val singleAgg = WorkAggregations(searchResponse) singleAgg.get.format shouldBe Some( Aggregation( @@ -62,13 +76,19 @@ class AggregationResultsTest extends AnyFunSpec with Matchers { AggregationBucket( AggregationBucketData("coconut", "coconut"), count = 9 + ), + AggregationBucket( + AggregationBucketData("rare fruit", "rare fruit"), + count = 1 ) ) ) ) } - it("populates AggregationBucketData with the same label and ID if no nested 'labels' bucket provided") { + it( + "populates AggregationBucketData with the same label and ID if no nested 'labels' bucket provided" + ) { val searchResponse = SearchResponse( took = 1234, isTimedOut = false, @@ -83,13 +103,15 @@ class AggregationResultsTest extends AnyFunSpec with Matchers { ), _aggregationsAsMap = Map( "format" -> Map( - "format" -> Map( - "doc_count_error_upper_bound" -> 0, - "sum_other_doc_count" -> 0, - "buckets" -> List( - Map( - "key" -> "artichoke", - "doc_count" -> 393145 + "nested" -> Map( + "format" -> Map( + "doc_count_error_upper_bound" -> 0, + "sum_other_doc_count" -> 0, + "buckets" -> List( + Map( + "key" -> "artichoke", + "doc_count" -> 393145 + ) ) ) ) @@ -109,7 +131,9 @@ class AggregationResultsTest extends AnyFunSpec with Matchers { ) } - it("correctly populates AggregationBucketData with IDs and labels if a nested 'labels' bucket is provided for each ID bucket") { + it( + "correctly populates AggregationBucketData with IDs and labels if a nested 'labels' bucket is provided for each ID bucket" + ) { val searchResponse = SearchResponse( took = 1234, isTimedOut = false, @@ -125,35 +149,37 @@ class AggregationResultsTest extends AnyFunSpec with Matchers { _aggregationsAsMap = Map( "format" -> Map( "doc_count" -> 12345, - "format" -> Map( - "doc_count_error_upper_bound" -> 0, - "sum_other_doc_count" -> 0, - "buckets" -> List( - Map( - "key" -> "123", - "doc_count" -> 393145, - "labels" -> Map( - "buckets" -> List( - Map( - "key" -> "absinthe", - "doc_count" -> 393145 + "nested" -> Map( + "format" -> Map( + "doc_count_error_upper_bound" -> 0, + "sum_other_doc_count" -> 0, + "buckets" -> List( + Map( + "key" -> "123", + "doc_count" -> 393145, + "labels" -> Map( + "buckets" -> List( + Map( + "key" -> "absinthe", + "doc_count" -> 393145 + ) ) ) - ) - ), - Map( - "key" -> "456", - "doc_count" -> 34, - "labels" -> Map( - "buckets" -> List( - Map( - "key" -> "apple", - "doc_count" -> 34 + ), + Map( + "key" -> "456", + "doc_count" -> 34, + "labels" -> Map( + "buckets" -> List( + Map( + "key" -> "apple", + "doc_count" -> 34 + ) ) ) ) ) - ), + ) ) ) ) diff --git a/search/src/test/scala/weco/api/search/services/AggregationsBuilderTest.scala b/search/src/test/scala/weco/api/search/services/AggregationsBuilderTest.scala index fdd4f6d64..3b832e1dd 100644 --- a/search/src/test/scala/weco/api/search/services/AggregationsBuilderTest.scala +++ b/search/src/test/scala/weco/api/search/services/AggregationsBuilderTest.scala @@ -1,7 +1,7 @@ package weco.api.search.services import com.sksamuel.elastic4s.ElasticDsl.termsQuery -import com.sksamuel.elastic4s.requests.searches.aggs.{AbstractAggregation, FilterAggregation, NestedAggregation, TermsAggregation} +import com.sksamuel.elastic4s.requests.searches.aggs.{FilterAggregation, NestedAggregation, TermsAggregation} import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery import org.scalatest.{Inside, LoneElement} import org.scalatest.funspec.AnyFunSpec @@ -10,6 +10,7 @@ import org.scalatest.prop.TableDrivenPropertyChecks import weco.api.search.matchers.AggregationRequestMatchers import weco.api.search.models.request.WorkAggregationRequest import weco.api.search.models.{FormatFilter, GenreFilter, LanguagesFilter, WorkFilter} +import weco.api.search.services.WorksRequestBuilder.buildWorkFilterQuery class AggregationsBuilderTest extends AnyFunSpec @@ -37,33 +38,39 @@ class AggregationsBuilderTest aggregations should have length 2 + val firstNestedAggregation = aggregations.head.subaggs.head.asInstanceOf[NestedAggregation] + val firstSelfNestedAggregation = aggregations.head.subaggs(1).asInstanceOf[NestedAggregation] + // The first aggregation is the Format one, filtered with the languages filter. aggregations.head should have( filters(Seq(languagesFilterQuery)), - aggregationField(formatFieldName) ) + firstNestedAggregation.subaggs.head.asInstanceOf[TermsAggregation] should have( + aggregationField(s"${formatFieldName}.id") + ) + + inside(firstSelfNestedAggregation.subaggs.head) { + case selfFilter: TermsAggregation => + selfFilter should have( + aggregationField(s"${formatFieldName}.id") + ) + } + + val secondNestedAggregation = aggregations(1).subaggs.head.asInstanceOf[NestedAggregation] + val secondSelfNestedAggregation = aggregations(1).subaggs(1).asInstanceOf[NestedAggregation] - println(aggregations.head.subaggs(0)) - println(aggregations.head.subaggs(1).asInstanceOf[NestedAggregation].subaggs.head) -// -// inside(aggregations.head.subaggs(1).asInstanceOf[NestedAggregation].subaggs.head) { -// case selfFilter: TermsAggregation => -// selfFilter should have( -// filter(languagesFilterQuery), -// aggregationField(languagesFieldName) -// ) -// } // The second aggregation is the Languages one, filtered with the format filter. aggregations(1) should have( filters(Seq(formatFilterQuery)), - aggregationField(languagesFieldName) + ) + secondNestedAggregation.subaggs.head.asInstanceOf[TermsAggregation] should have( + aggregationField(s"${languagesFieldName}.id") ) - inside(aggregations(1).subaggs(1)) { case selfFilter: FilterAggregation => + inside(secondSelfNestedAggregation.subaggs.head) { case selfFilter: TermsAggregation => selfFilter should have( - filter(languagesFilterQuery), - aggregationField(languagesFieldName) + aggregationField(s"${languagesFieldName}.id") ) } } @@ -78,15 +85,34 @@ class AggregationsBuilderTest // The aggregation list is just the requested aggregation // filtered by the requested (unpaired) filter. - aggregations.loneElement - .asInstanceOf[FilterAggregation] - .subaggs - .loneElement // This marks the absence of the "self" filteraggregation - .asInstanceOf[TermsAggregation] - .field - .get shouldBe WorkAggregationRequest.Format.toString + val filterAggregation = aggregations.loneElement + val nestedAggregation = filterAggregation.subaggs.loneElement.asInstanceOf[NestedAggregation] // This marks the absence of the "self" aggregation + val termsAggregation = nestedAggregation.subaggs.loneElement.asInstanceOf[TermsAggregation] + + termsAggregation.field.get shouldBe s"${formatFieldName}.id" } + it("includes a label-based subaggregation") { + val languagesFilter = LanguagesFilter(Seq("en")) + + val aggregations = WorksAggregationsBuilder.getAggregations( + List(languagesFilter), + List(WorkAggregationRequest.Format) + ) + + // The aggregation list is just the requested aggregation + // filtered by the requested (unpaired) filter. + val filterAggregation = aggregations.loneElement + val nestedAggregation = filterAggregation.subaggs.loneElement.asInstanceOf[NestedAggregation] // This marks the absence of the "self" aggregation + val termsAggregation = nestedAggregation.subaggs.loneElement.asInstanceOf[TermsAggregation] + + val labelSubaggregation = termsAggregation.subaggs.loneElement.asInstanceOf[TermsAggregation] + + labelSubaggregation.field.get shouldBe s"${formatFieldName}.label" + labelSubaggregation.size shouldBe Some(1) + } + + it("applies paired filters to non-paired aggregations") { val formatFilter = FormatFilter(Seq("bananas")) @@ -99,28 +125,30 @@ class AggregationsBuilderTest //The first aggregation is Format, which has a "self" subaggregation. // The details of the content of this aggregation are examined // in the "applies to aggregations with a paired filter" test. - val firstAgg = aggregations.head - .asInstanceOf[FilterAggregation] + val firstAggregation = aggregations.head + firstAggregation.subaggs should have length 2 - firstAgg.subaggs should have length 2 - firstAgg should have( - filters(Nil), - aggregationField(WorkAggregationRequest.Format.toString) + val firstNestedAggregation = firstAggregation.subaggs.head.asInstanceOf[NestedAggregation] + + firstNestedAggregation.subaggs.head.asInstanceOf[TermsAggregation] should have( + aggregationField(s"${formatFieldName}.id") ) //The second aggregation is Language, which has no corresponding // filter in this query, so does not have a "self" subaggregation - val secondAgg = aggregations(1) - .asInstanceOf[FilterAggregation] - - secondAgg.subaggs should have length 1 + val secondAggregation = aggregations(1) + secondAggregation.subaggs should have length 1 // But it should still be filtered using the format filter - secondAgg should have( - filters(Seq(formatFilterQuery)), - aggregationField(WorkAggregationRequest.Languages.toString) + secondAggregation should have( + filters(Seq(formatFilterQuery)) ) + val secondNestedAggregation = secondAggregation.subaggs.head.asInstanceOf[NestedAggregation] + + secondNestedAggregation.subaggs.head.asInstanceOf[TermsAggregation] should have( + aggregationField(s"${languagesFieldName}.id") + ) } it("applies all other aggregation-dependent filters to the paired filter") { @@ -134,7 +162,7 @@ class AggregationsBuilderTest List( WorkAggregationRequest.Format, WorkAggregationRequest.Languages, - WorkAggregationRequest.Genre + WorkAggregationRequest.GenreLabel ) ) @@ -150,17 +178,15 @@ class AggregationsBuilderTest filters ): _* ) - ) { (agg: AbstractAggregation, thisFilter: WorkFilter) => - val filterQuery = agg - .asInstanceOf[FilterAggregation] - .query - .asInstanceOf[BoolQuery] + ) { (agg: FilterAggregation, thisFilter: WorkFilter) => + val filterQuery = agg.query.asInstanceOf[BoolQuery] + //Three filters are requested, each aggregation should // have only two. i.e. not it's own filterQuery.filters should have length 2 // And this ensures that it is the correct two. filterQuery.filters should contain theSameElementsAs filters - .filterNot(_ == thisFilter) + .filterNot(_ == thisFilter).map(buildWorkFilterQuery) } } } diff --git a/search/src/test/scala/weco/api/search/services/AggregationsTest.scala b/search/src/test/scala/weco/api/search/services/AggregationsTest.scala index df16eb573..a497afa17 100644 --- a/search/src/test/scala/weco/api/search/services/AggregationsTest.scala +++ b/search/src/test/scala/weco/api/search/services/AggregationsTest.scala @@ -92,7 +92,7 @@ class AggregationsTest val searchOptions = createWorksSearchOptionsWith( aggregations = - List(WorkAggregationRequest.Format, WorkAggregationRequest.Subject), + List(WorkAggregationRequest.Format, WorkAggregationRequest.SubjectLabel), filters = List(FormatFilter(List("a"))) ) whenReady(aggregationQuery(index, searchOptions)) { aggs => @@ -134,7 +134,7 @@ class AggregationsTest val searchOptions = createWorksSearchOptionsWith( aggregations = - List(WorkAggregationRequest.Format, WorkAggregationRequest.Subject), + List(WorkAggregationRequest.Format, WorkAggregationRequest.SubjectLabel), filters = List( FormatFilter(List("a")), SubjectLabelFilter(Seq("fIbfVPkqaf")) @@ -163,7 +163,7 @@ class AggregationsTest val searchOptions = createWorksSearchOptionsWith( aggregations = - List(WorkAggregationRequest.Format, WorkAggregationRequest.Subject), + List(WorkAggregationRequest.Format, WorkAggregationRequest.SubjectLabel), filters = List( FormatFilter(List("a")), SubjectLabelFilter(Seq("6rJpSUKd2d")) diff --git a/search/src/test/scala/weco/api/search/works/WorksAggregationsTest.scala b/search/src/test/scala/weco/api/search/works/WorksAggregationsTest.scala index e26c113a0..58f0f6b12 100644 --- a/search/src/test/scala/weco/api/search/works/WorksAggregationsTest.scala +++ b/search/src/test/scala/weco/api/search/works/WorksAggregationsTest.scala @@ -24,8 +24,7 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { { "data" : { "id" : "a", - "label" : "Books", - "type" : "Format" + "label" : "Books" }, "count" : ${worksFormatBooks.length}, "type" : "AggregationBucket" @@ -33,8 +32,7 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { { "data" : { "id" : "d", - "label" : "Journals", - "type" : "Format" + "label" : "Journals" }, "count" : ${worksFormatJournals.length}, "type" : "AggregationBucket" @@ -42,8 +40,7 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { { "data" : { "id" : "i", - "label" : "Audio", - "type" : "Format" + "label" : "Audio" }, "count" : ${worksFormatAudio.length}, "type" : "AggregationBucket" @@ -51,8 +48,7 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { { "data" : { "id" : "k", - "label" : "Pictures", - "type" : "Format" + "label" : "Pictures" }, "count" : ${worksFormatPictures.length}, "type" : "AggregationBucket" @@ -89,9 +85,8 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { "buckets": [ { "data" : { - "label" : "Electronic books", - "concepts": [], - "type" : "Genre" + "id" : "Electronic books", + "label" : "Electronic books" }, "count" : 1, "type" : "AggregationBucket" @@ -134,24 +129,24 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { "buckets": [ { "data" : { - "label": "1098", - "type": "Period" + "id": "1098", + "label": "1098" }, "count" : 1, "type" : "AggregationBucket" }, { "data" : { - "label": "1904", - "type": "Period" + "id": "1904", + "label": "1904" }, "count" : 1, "type" : "AggregationBucket" }, { "data" : { - "label": "2020", - "type": "Period" + "id": "2020", + "label": "2020" }, "count" : 1, "type" : "AggregationBucket" @@ -199,8 +194,7 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { { "data" : { "id" : "eng", - "label" : "English", - "type" : "Language" + "label" : "English" }, "count" : 5, "type" : "AggregationBucket" @@ -208,8 +202,7 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { { "data" : { "id" : "swe", - "label" : "Swedish", - "type" : "Language" + "label" : "Swedish" }, "count" : 3, "type" : "AggregationBucket" @@ -217,8 +210,7 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { { "data" : { "id" : "tur", - "label" : "Turkish", - "type" : "Language" + "label" : "Turkish" }, "count" : 2, "type" : "AggregationBucket" @@ -257,18 +249,16 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { "buckets": [ { "data" : { - "concepts" : [], - "label" : "realAnalysis", - "type" : "Subject" + "id" : "realAnalysis", + "label" : "realAnalysis" }, "count" : 3, "type" : "AggregationBucket" }, { "data" : { - "concepts" : [], - "label" : "paleoNeuroBiology", - "type" : "Subject" + "id" : "paleoNeuroBiology", + "label" : "paleoNeuroBiology" }, "count" : 2, "type" : "AggregationBucket" @@ -308,32 +298,32 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { { "count" : 2, "data" : { - "label" : "47", - "type" : "Agent" + "id" : "47", + "label" : "47" }, "type" : "AggregationBucket" }, { "count" : 2, "data" : { - "label" : "MI5", - "type" : "Organisation" + "id" : "MI5", + "label" : "MI5" }, "type" : "AggregationBucket" }, { "count" : 1, "data" : { - "label" : "007", - "type" : "Agent" + "id" : "007", + "label" : "007" }, "type" : "AggregationBucket" }, { "count" : 1, "data" : { - "label" : "GCHQ", - "type" : "Organisation" + "id" : "GCHQ" + "label" : "GCHQ" }, "type" : "AggregationBucket" } @@ -447,8 +437,7 @@ class WorksAggregationsTest extends AnyFunSpec with ApiWorksTestBase { "count": 2, "data": { "label": "Open shelves", - "id": "open-shelves", - "type" : "Availability" + "id": "open-shelves" }, "type": "AggregationBucket" } diff --git a/search/src/test/scala/weco/api/search/works/WorksFacetingTest.scala b/search/src/test/scala/weco/api/search/works/WorksFacetingTest.scala index 7405d6c79..7e3611319 100644 --- a/search/src/test/scala/weco/api/search/works/WorksFacetingTest.scala +++ b/search/src/test/scala/weco/api/search/works/WorksFacetingTest.scala @@ -1,12 +1,11 @@ package weco.api.search.works +import io.circe.Json +import io.circe.syntax.EncoderOps import org.scalatest.GivenWhenThen import weco.api.search.FacetingFeatures import weco.api.search.fixtures.{JsonServer, LocalJsonServerFixture} -import weco.api.search.generators.{ - AggregationDocumentGenerators, - BucketGenerators -} +import weco.api.search.generators.{AggregationDocumentGenerators, BucketGenerators} import weco.fixtures.TestWith class WorksFacetingTest @@ -25,8 +24,8 @@ class WorksFacetingTest (3, "d", "Journals"), (2, "i", "Audio"), (1, "k", "Pictures") - ) map { - case (count, code, label) => toKeywordBucket("Format", count, code, label) + ) map { case (count, code, label) => + toKeywordBucket(count, code, label) }: _* ) private val languageBuckets = Seq( @@ -35,18 +34,16 @@ class WorksFacetingTest (3, "que", "Quechua"), (2, "mar", "Marathi"), (1, "che", "Chechen") - ) map { - case (count, code, label) => - toKeywordBucket("Language", count, code, label) + ) map { case (count, code, label) => + toKeywordBucket(count, code, label) }: _* ) private val capybaraWorkTypeBuckets = Seq( Seq( (2, "a", "Books"), (1, "d", "Journals") - ) map { - case (count, code, label) => - toKeywordBucket("Format", count, code, label) + ) map { case (count, code, label) => + toKeywordBucket(count, code, label) }: _* ) @@ -54,9 +51,8 @@ class WorksFacetingTest Seq( (2, "mar", "Marathi"), (1, "bak", "Bashkir") - ) map { - case (count, code, label) => - toKeywordBucket("Language", count, code, label) + ) map { case (count, code, label) => + toKeywordBucket(count, code, label) }: _* ) @@ -64,9 +60,8 @@ class WorksFacetingTest Seq( (1, "a", "Books"), (1, "d", "Journals") - ) map { - case (count, code, label) => - toKeywordBucket("Format", count, code, label) + ) map { case (count, code, label) => + toKeywordBucket(count, code, label) }: _* ) @@ -74,44 +69,45 @@ class WorksFacetingTest Seq( (3, "bak", "Bashkir"), (1, "mar", "Marathi") - ) map { - case (count, code, label) => - toKeywordBucket("Language", count, code, label) + ) map { case (count, code, label) => + toKeywordBucket(count, code, label) }: _* ) private val aggregatedWorks = (0 to 9).map(i => s"works.examples.filtered-aggregations-tests.$i") + private val top20AggregatableContributors = ('a' to 'z').map(n => + createAggregatableField(s"Beverley Crusher ($n)") + ) + private val top21AggregatableContributors = + top20AggregatableContributors ++ Seq(createAggregatableField("Mark Sloan")) + private val top20FilterableContributors = top20AggregatableContributors.map(_.hcursor.downField("label").as[Json].toOption.get).asJson + private val top21FilterableContributors = top21AggregatableContributors.map(_.hcursor.downField("label").as[Json].toOption.get).asJson + + private val top21Contributors = Seq( createWorkDocument( - s"abadcafe", + "abadcafe", "top 20 only", - Map( - "contributors.agent.label" -> ('a' to 'z') - .map(n => s"Beverley Crusher ($n)") - ) + Map("contributors.agent" -> top20AggregatableContributors), + Map("contributors.agent.label" -> top20FilterableContributors) ), createWorkDocument( "goodcafe", "top 20 and hapax", - Map( - "contributors.agent.label" -> (('a' to 'z') - .map(n => s"Beverley Crusher ($n)") :+ "Mark Sloan") - ) + Map("contributors.agent" -> top21AggregatableContributors), + Map("contributors.agent.label" -> top21FilterableContributors) ) ) - private val multipleUncommonContributors = top21Contributors :+ createWorkDocument( - "baadf00d", - "top 1 and hapax legomenon", - Map( - "contributors.agent.label" -> Seq( - "Yuri Zhivago", - "Beverley Crusher (a)" - ) + private val multipleUncommonContributors = + top21Contributors :+ createWorkDocument( + "baadf00d", + "top 1 and hapax legomenon", + Map("contributors.agent" -> Seq(createAggregatableField("Yuri Zhivago"),createAggregatableField("Beverley Crusher (a)"))), + Map("contributors.agent.label" -> Seq("Yuri Zhivago", "Beverley Crusher (a)").asJson) ) - ) private val givens = Map[String, Seq[TestDocument]]( "a dataset with some common aggregable values and a less common one" -> top21Contributors, @@ -130,13 +126,12 @@ class WorksFacetingTest private def withFacetedAPI[R]( docs: Option[Seq[TestDocument]] )(testWith: TestWith[JsonServer, R]): R = - withWorksApi[R] { - case (worksIndex, route) => - docs match { - case Some(docs) => indexLoadedTestDocuments(worksIndex, docs) - case None => indexTestDocuments(worksIndex, aggregatedWorks: _*) - } - testWith(new WorksJsonServer(route)) + withWorksApi[R] { case (worksIndex, route) => + docs match { + case Some(docs) => indexLoadedTestDocuments(worksIndex, docs) + case None => indexTestDocuments(worksIndex, aggregatedWorks: _*) + } + testWith(new WorksJsonServer(route)) } protected val oneAggregation: ScenarioData = ScenarioData( @@ -199,14 +194,13 @@ class WorksFacetingTest filters = Seq(("workType", "k"), ("languages", "mar")), expectedAggregationBuckets = Map( "workType" -> (marathiWorkTypeBuckets :+ toKeywordBucket( - "Format", 0, "k", "Pictures" )), "languages" -> Seq( - toKeywordBucket("Language", 1, "que", "Quechua"), - toKeywordBucket("Language", 0, "mar", "Marathi") + toKeywordBucket(1, "que", "Quechua"), + toKeywordBucket(0, "mar", "Marathi") ) ) ) @@ -226,8 +220,8 @@ class WorksFacetingTest // Quechua alone would be Pictures(1), Journals(2) expectedAggregationBuckets = Map( "workType" -> Seq( - toKeywordBucket("Format", 1, "d", "Journals"), - toKeywordBucket("Format", 1, "k", "Pictures") + toKeywordBucket(1, "d", "Journals"), + toKeywordBucket(1, "k", "Pictures") ) ) ) @@ -236,9 +230,9 @@ class WorksFacetingTest aggregationFields = Seq("contributors.agent.label"), filters = Seq(("contributors.agent.label", "Mark%20Sloan")), expectedAggregationBuckets = Map( - "contributors.agent.label" -> (('a' to 't').map( - n => toUnidentifiedBucket(2, s"Beverley Crusher ($n)") - ) :+ toUnidentifiedBucket(1, "Mark Sloan")) + "contributors.agent.label" -> (('a' to 't').map(n => + toKeywordBucket(2, s"Beverley Crusher ($n)") + ) :+ toKeywordBucket(1, "Mark Sloan")) ) ) @@ -252,13 +246,13 @@ class WorksFacetingTest aggregationFields = Seq("contributors.agent.label"), expectedAggregationBuckets = Map( "contributors.agent.label" -> (Seq( - toUnidentifiedBucket(3, "Beverley Crusher (a)") - ) ++ ('b' to 't').map( - n => toUnidentifiedBucket(2, s"Beverley Crusher ($n)") + toKeywordBucket(3, "Beverley Crusher (a)") + ) ++ ('b' to 't').map(n => + toKeywordBucket(2, s"Beverley Crusher ($n)") ) ++ Seq( - toUnidentifiedBucket(2, "Beverley Crusher (z)"), - toUnidentifiedBucket(1, "Mark Sloan"), - toUnidentifiedBucket(1, "Yuri Zhivago") + toKeywordBucket(2, "Beverley Crusher (z)"), + toKeywordBucket(1, "Mark Sloan"), + toKeywordBucket(1, "Yuri Zhivago") )) ) ) @@ -269,9 +263,9 @@ class WorksFacetingTest aggregationFields = Seq("contributors.agent.label"), expectedAggregationBuckets = Map( "contributors.agent.label" -> Seq( - toUnidentifiedBucket(1, "Beverley Crusher (a)"), - toUnidentifiedBucket(1, "Yuri Zhivago"), - toUnidentifiedBucket(0, "Mark Sloan") + toKeywordBucket(1, "Beverley Crusher (a)"), + toKeywordBucket(1, "Yuri Zhivago"), + toKeywordBucket(0, "Mark Sloan") ) ) ) diff --git a/search/src/test/scala/weco/api/search/works/WorksFilteredAggregationsTest.scala b/search/src/test/scala/weco/api/search/works/WorksFilteredAggregationsTest.scala index df46d443b..d85adfd77 100644 --- a/search/src/test/scala/weco/api/search/works/WorksFilteredAggregationsTest.scala +++ b/search/src/test/scala/weco/api/search/works/WorksFilteredAggregationsTest.scala @@ -1,5 +1,6 @@ package weco.api.search.works +import io.circe.syntax.EncoderOps import org.scalatest.funspec.AnyFunSpec import weco.api.search.generators.AggregationDocumentGenerators import weco.api.search.models.request.WorksIncludes @@ -52,8 +53,7 @@ class WorksFilteredAggregationsTest "count" : 3, "data" : { "id" : "bak", - "label" : "Bashkir", - "type" : "Language" + "label" : "Bashkir" }, "type" : "AggregationBucket" }, @@ -61,8 +61,7 @@ class WorksFilteredAggregationsTest "count" : 1, "data" : { "id" : "mar", - "label" : "Marathi", - "type" : "Language" + "label" : "Marathi" }, "type" : "AggregationBucket" } @@ -128,8 +127,7 @@ class WorksFilteredAggregationsTest "count" : 3, "data" : { "id" : "d", - "label" : "Journals", - "type" : "Format" + "label" : "Journals" }, "type" : "AggregationBucket" }, @@ -137,8 +135,7 @@ class WorksFilteredAggregationsTest "count" : 2, "data" : { "id" : "i", - "label" : "Audio", - "type" : "Format" + "label" : "Audio" }, "type" : "AggregationBucket" }, @@ -146,8 +143,7 @@ class WorksFilteredAggregationsTest "count" : 1, "data" : { "id" : "k", - "label" : "Pictures", - "type" : "Format" + "label" : "Pictures" }, "type" : "AggregationBucket" } @@ -187,8 +183,7 @@ class WorksFilteredAggregationsTest "count" : 1, "data" : { "id" : "i", - "label" : "Audio", - "type" : "Format" + "label" : "Audio" }, "type" : "AggregationBucket" }, @@ -196,8 +191,7 @@ class WorksFilteredAggregationsTest "count" : 0, "data" : { "id" : "a", - "label" : "Books", - "type" : "Format" + "label" : "Books" }, "type" : "AggregationBucket" } @@ -217,11 +211,8 @@ class WorksFilteredAggregationsTest val doc = createWorkDocument( "xkcd0327", "Exploits Of a Mom", - Map( - "contributors.agent.label" -> Seq( - "Robert .?+*|{}<>&@[]()\" Tables" - ) - ) + Map("contributors.agent" -> Seq(createAggregatableField("Robert .?+*|{}<>&@[]()\" Tables"))), + Map("contributors.agent.label" -> Seq("Robert .?+*|{}<>&@[]()\" Tables").asJson) ) indexLoadedTestDocuments(worksIndex, Seq(doc)) @@ -240,6 +231,7 @@ class WorksFilteredAggregationsTest "contributors.agent.label": { "buckets": [{ "data": { + "id" : "Robert .?+*|{}<>&@[]()\\" Tables", "label": "Robert .?+*|{}<>&@[]()\\" Tables" }, "count":1, @@ -276,8 +268,7 @@ class WorksFilteredAggregationsTest "count" : 2, "data" : { "id" : "i", - "label" : "Audio", - "type" : "Format" + "label" : "Audio" }, "type" : "AggregationBucket" }, @@ -285,8 +276,7 @@ class WorksFilteredAggregationsTest "count" : 1, "data" : { "id" : "a", - "label" : "Books", - "type" : "Format" + "label" : "Books" }, "type" : "AggregationBucket" }, @@ -294,8 +284,7 @@ class WorksFilteredAggregationsTest "count" : 1, "data" : { "id" : "d", - "label" : "Journals", - "type" : "Format" + "label" : "Journals" }, "type" : "AggregationBucket" } @@ -330,8 +319,7 @@ class WorksFilteredAggregationsTest "count" : 4, "data" : { "id" : "a", - "label" : "Books", - "type" : "Format" + "label" : "Books" }, "type" : "AggregationBucket" }, @@ -339,8 +327,7 @@ class WorksFilteredAggregationsTest "count" : 3, "data" : { "id" : "d", - "label" : "Journals", - "type" : "Format" + "label" : "Journals" }, "type" : "AggregationBucket" }, @@ -348,8 +335,7 @@ class WorksFilteredAggregationsTest "count" : 2, "data" : { "id" : "i", - "label" : "Audio", - "type" : "Format" + "label" : "Audio" }, "type" : "AggregationBucket" }, @@ -357,8 +343,7 @@ class WorksFilteredAggregationsTest "count" : 1, "data" : { "id" : "k", - "label" : "Pictures", - "type" : "Format" + "label" : "Pictures" }, "type" : "AggregationBucket" }