Skip to content

Commit

Permalink
Merge pull request #31813 from vespa-engine/toregge/add-document-freq…
Browse files Browse the repository at this point in the history
…uency-to-query-items

Add document frequency to query items.
  • Loading branch information
toregge authored Jul 2, 2024
2 parents e0ff928 + c700bab commit d1b3b4c
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 3 deletions.
31 changes: 29 additions & 2 deletions container-search/abi-spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -480,10 +480,30 @@
"public void setExplicitSignificance(boolean)",
"public boolean hasExplicitSignificance()",
"public double getSignificance()",
"public void setDocumentFrequency(com.yahoo.prelude.query.DocumentFrequency)",
"public java.util.Optional getDocumentFrequency()",
"public boolean hasUniqueID()"
],
"fields" : [ ]
},
"com.yahoo.prelude.query.DocumentFrequency" : {
"superClass" : "java.lang.Record",
"interfaces" : [ ],
"attributes" : [
"public",
"final",
"record"
],
"methods" : [
"public void <init>(long, long)",
"public final java.lang.String toString()",
"public final int hashCode()",
"public final boolean equals(java.lang.Object)",
"public long frequency()",
"public long count()"
],
"fields" : [ ]
},
"com.yahoo.prelude.query.DotProductItem" : {
"superClass" : "com.yahoo.prelude.query.WeightedSetItem",
"interfaces" : [ ],
Expand Down Expand Up @@ -874,7 +894,8 @@
"protected com.yahoo.prelude.query.Item connectedBacklink",
"protected double connectivity",
"protected double significance",
"protected boolean explicitSignificance"
"protected boolean explicitSignificance",
"protected com.yahoo.prelude.query.DocumentFrequency documentFrequency"
]
},
"com.yahoo.prelude.query.ItemHelper" : {
Expand Down Expand Up @@ -1578,6 +1599,8 @@
"public void setExplicitSignificance(boolean)",
"public boolean hasExplicitSignificance()",
"public double getSignificance()",
"public void setDocumentFrequency(com.yahoo.prelude.query.DocumentFrequency)",
"public java.util.Optional getDocumentFrequency()",
"public boolean hasUniqueID()"
],
"fields" : [ ]
Expand Down Expand Up @@ -1679,7 +1702,9 @@
"public abstract void setSignificance(double)",
"public abstract boolean hasExplicitSignificance()",
"public abstract void setExplicitSignificance(boolean)",
"public abstract double getSignificance()"
"public abstract double getSignificance()",
"public abstract void setDocumentFrequency(com.yahoo.prelude.query.DocumentFrequency)",
"public abstract java.util.Optional getDocumentFrequency()"
],
"fields" : [ ]
},
Expand All @@ -1703,6 +1728,8 @@
"public void setExplicitSignificance(boolean)",
"public boolean hasExplicitSignificance()",
"public double getSignificance()",
"public void setDocumentFrequency(com.yahoo.prelude.query.DocumentFrequency)",
"public java.util.Optional getDocumentFrequency()",
"public boolean hasUniqueID()"
],
"fields" : [ ]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import java.util.Optional;

/**
* Common implementation for Item classes implementing the TaggableItem interface.
* Note that this file exists in 3 copies that should be kept in sync:
Expand Down Expand Up @@ -68,6 +70,12 @@ public double getSignificance() {
return significance;
}

@Override
public void setDocumentFrequency(DocumentFrequency documentFrequency) { this.documentFrequency = documentFrequency; }

@Override
public Optional<DocumentFrequency> getDocumentFrequency() { return Optional.ofNullable(documentFrequency); }

//Change access privilege from protected to public.
public boolean hasUniqueID() {
return super.hasUniqueID();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import com.yahoo.api.annotations.Beta;

/**
* The expected number of documents matching an item given a corpus of
* multiple documents. This is the raw data used to calculate variants
* of idf, used as significance.
*
* @param frequency The number of documents in which an item occurs
* @param count The total number of documents in the corpus
*/
@Beta
public record DocumentFrequency(long frequency, long count) {
}
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ public enum ItemCreator {
protected double significance = 0;
protected boolean explicitSignificance = false;

protected DocumentFrequency documentFrequency = null;

/** Whether this item is eligible for change by query rewriters (false) or should be kept as-is (true) */
private boolean isProtected;

Expand Down Expand Up @@ -495,6 +497,8 @@ public void disclose(Discloser discloser) {
discloser.addProperty("usePositionData", usePositionData);
if (explicitSignificance)
discloser.addProperty("significance", significance);
if (documentFrequency != null)
discloser.addProperty("documentFrequency", documentFrequency);
if (weight != 100)
discloser.addProperty("weight", weight);
if (label != null)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import java.util.Optional;

/**
* Common implementation for Item classes implementing the TaggableItem interface.
* Note that this file exist in 3 copies that should be kept in sync:
Expand Down Expand Up @@ -68,6 +70,12 @@ public double getSignificance() {
return significance;
}

@Override
public void setDocumentFrequency(DocumentFrequency documentFrequency) { this.documentFrequency = documentFrequency; }

@Override
public Optional<DocumentFrequency> getDocumentFrequency() { return Optional.ofNullable(documentFrequency); }

//Change access privilege from protected to public.
public boolean hasUniqueID() {
return super.hasUniqueID();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;
import com.yahoo.api.annotations.Beta;

import java.util.Optional;

/**
* An interface used for anything which may be addressed using an external,
Expand Down Expand Up @@ -44,4 +47,9 @@ public interface TaggableItem {
void setExplicitSignificance(boolean significance);
double getSignificance();

@Beta
void setDocumentFrequency(DocumentFrequency documentFrequency);

@Beta
Optional<DocumentFrequency> getDocumentFrequency();
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import java.util.Optional;

/**
* Common implementation for Item classes implementing the TaggableItem interface.
* Note that this file exist in 3 copies that should be kept in sync:
Expand Down Expand Up @@ -81,6 +83,12 @@ public double getSignificance() {
return significance;
}

@Override
public void setDocumentFrequency(DocumentFrequency documentFrequency) { this.documentFrequency = documentFrequency; }

@Override
public Optional<DocumentFrequency> getDocumentFrequency() { return Optional.ofNullable(documentFrequency); }

//Change access privilege from protected to public.
@Override
public boolean hasUniqueID() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ void requireSimilarAPIs() {
.getDeclaredMethods();
final Method[] simple = SimpleTaggableItem.class.getDeclaredMethods();
final Method[] segment = TaggableSegmentItem.class.getDeclaredMethods();
final int numberOfMethods = 10;
final int numberOfMethods = 12;
assertEquals(numberOfMethods, composite.length);
assertEquals(numberOfMethods, simple.length);
assertEquals(numberOfMethods, segment.length);
Expand Down Expand Up @@ -152,4 +152,13 @@ final void testSetSignificance() {
assertTrue(p.hasExplicitSignificance());
}

@Test
final void testSetDocumentFrequency() {
final PhraseSegmentItem p = new PhraseSegmentItem("farmyards", false, false);
assertFalse(p.getDocumentFrequency().isPresent());
p.setDocumentFrequency(new DocumentFrequency(13, 100));
assertTrue(p.getDocumentFrequency().isPresent());
assertEquals(new DocumentFrequency(13, 100), p.getDocumentFrequency().get());
}

}

0 comments on commit d1b3b4c

Please sign in to comment.