Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FTS: Hybrid search query pipeline #42885

Merged
merged 16 commits into from
Nov 19, 2024

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
### 4.65.0-beta.1 (Unreleased)

#### Features Added
* Added support for Hybrid Search and Full text queries and new query features `HybridSearch` and `CountIf` in CosmosDB - See [42885](https://github.com/Azure/azure-sdk-for-java/pull/42885)
* Added `CosmosFullTextPolicy` in `CosmosContainerProperties` and `CosmosFullTextIndexes` in `IndexingPolicy` to support Full Text Search in Cosmos DB - See [PR 42278](https://github.com/Azure/azure-sdk-for-java/pull/42278)
* Added two new properties `quantizationSizeInBytes` and `indexingSearchListSize` to the `CosmosVectorIndexSpec` to support Partitioned DiskANN for vector search in Cosmos DB - See [PR 42333](https://github.com/Azure/azure-sdk-for-java/pull/42333)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ public class Configs {
private static final String MAX_ITEM_COUNT_FOR_VECTOR_SEARCH = "COSMOS.MAX_ITEM_SIZE_FOR_VECTOR_SEARCH";
public static final int DEFAULT_MAX_ITEM_COUNT_FOR_VECTOR_SEARCH = 50000;

private static final String MAX_ITEM_COUNT_FOR_HYBRID_SEARCH = "COSMOS.MAX_ITEM_SIZE_FOR_HYBRID_SEARCH";
private static final String MAX_ITEM_COUNT_FOR_HYBRID_SEARCH_VARIABLE = "COSMOS_MAX_ITEM_SIZE_FOR_HYBRID_SEARCH";
public static final int DEFAULT_MAX_ITEM_COUNT_FOR_HYBRID_SEARCH = 1000;

private static final String AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY = "COSMOS.AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY";

private static final boolean DEFAULT_AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY = false;
Expand Down Expand Up @@ -574,6 +578,19 @@ public static int getMaxItemCountForVectorSearch() {
String.valueOf(DEFAULT_MAX_ITEM_COUNT_FOR_VECTOR_SEARCH))));
}

public static int getMaxItemCountForHybridSearchSearch() {
String valueFromSystemProperty = System.getProperty(MAX_ITEM_COUNT_FOR_HYBRID_SEARCH);
if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) {
return Integer.parseInt(valueFromSystemProperty);
}

String valueFromSystemVariable = System.getenv(MAX_ITEM_COUNT_FOR_HYBRID_SEARCH_VARIABLE);
if (valueFromSystemVariable != null && !valueFromSystemVariable.isEmpty()) {
return Integer.parseInt(valueFromSystemVariable);
}
return DEFAULT_MAX_ITEM_COUNT_FOR_HYBRID_SEARCH;
}

public static boolean getAzureCosmosNonStreamingOrderByDisabled() {
if(logger.isTraceEnabled()) {
logger.trace(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,19 @@ public static final class Properties {
public static final String FULL_TEXT_POLICY = "fullTextPolicy";
public static final String FULL_TEXT_PATHS = "fullTextPaths";

// Hybrid Search Query
public static final String GLOBAL_STATISTICS_QUERY = "globalStatisticsQuery";
public static final String COMPONENT_QUERY_INFOS = "componentQueryInfos";
public static final String PROJECTION_QUERY_INFO = "projectionQueryInfo";
public static final String SKIP = "skip";
public static final String TAKE = "take";
public static final String REQUIRES_GLOBAL_STATISTICS = "requiresGlobalStatistics";
public static final String TOTAL_WORD_COUNT = "totalWordCount";
public static final String HIT_COUNTS = "hitCounts";
public static final String DOCUMENT_COUNT = "documentCount";
public static final String FULL_TEXT_QUERY_STATISTICS = "fullTextStatistics";
public static final String PAYLOAD = "payload";
public static final String COMPONENT_SCORES = "componentScores";

// Vector Embedding Policy
public static final String VECTOR_EMBEDDING_POLICY = "vectorEmbeddingPolicy";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public final class CosmosQueryRequestOptionsImpl extends CosmosQueryRequestOptio
private boolean emptyPageDiagnosticsEnabled;
private String queryName;
private Integer maxItemCountForVectorSearch;
private Integer maxItemCountForHybridSearch;
private List<CosmosDiagnostics> cancelledRequestDiagnosticsTracker = new ArrayList<>();
private String collectionRid;

Expand Down Expand Up @@ -69,6 +70,7 @@ public CosmosQueryRequestOptionsImpl(CosmosQueryRequestOptionsImpl options) {
this.feedRange = options.feedRange;
this.cancelledRequestDiagnosticsTracker = options.cancelledRequestDiagnosticsTracker;
this.maxItemCountForVectorSearch = options.maxItemCountForVectorSearch;
this.maxItemCountForHybridSearch = options.maxItemCountForHybridSearch;
this.collectionRid = options.collectionRid;
}

Expand Down Expand Up @@ -235,6 +237,15 @@ public Integer getMaxItemCountForVectorSearch() {
return this.maxItemCountForVectorSearch;
}

/**
* Gets the maximum item size to fetch during hybrid search queries.
*
* @return the max number of items for hybrid search.
*/
public Integer getMaxItemCountForHybridSearch() {
return this.maxItemCountForHybridSearch != null ? this.maxItemCountForHybridSearch : Configs.getMaxItemCountForHybridSearchSearch();
}

/**
* Sets the maximum item size to fetch during non-streaming order by queries.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ void setCancelledRequestDiagnosticsTracker(

Integer getMaxItemCountForVectorSearch(CosmosQueryRequestOptions options);

Integer getMaxItemCountForHybridSearch(CosmosQueryRequestOptions options);

void setPartitionKeyDefinition(CosmosQueryRequestOptions options, PartitionKeyDefinition partitionKeyDefinition);

PartitionKeyDefinition getPartitionKeyDefinition(CosmosQueryRequestOptions options);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ public Integer getInt(String propertyName) {
* @param propertyName the property to get.
* @return the long value
*/
protected Long getLong(String propertyName) {
public Long getLong(String propertyName) {
return getWithMapping(propertyName, JsonNode::asLong);
}

Expand Down
Loading
Loading