From 76b4468587f635ebcb4ba2b0a01e4325d9a23f9d Mon Sep 17 00:00:00 2001 From: misselvexu Date: Thu, 14 Nov 2024 18:52:59 +0800 Subject: [PATCH 1/4] Add spring boot starter for pgvector. --- .../pom.xml | 89 +++++++++++++++++++ .../MetadataStorageConfigProperties.java | 28 ++++++ ...VectorEmbeddingStoreAutoConfiguration.java | 61 +++++++++++++ .../PgVectorEmbeddingStoreProperties.java | 34 +++++++ ...ot.autoconfigure.AutoConfiguration.imports | 1 + ...ctorEmbeddingStoreAutoConfigurationIT.java | 71 +++++++++++++++ pom.xml | 1 + 7 files changed, 285 insertions(+) create mode 100644 langchain4j-pgvector-spring-boot-starter/pom.xml create mode 100644 langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/MetadataStorageConfigProperties.java create mode 100644 langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java create mode 100644 langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java create mode 100644 langchain4j-pgvector-spring-boot-starter/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports create mode 100644 langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java diff --git a/langchain4j-pgvector-spring-boot-starter/pom.xml b/langchain4j-pgvector-spring-boot-starter/pom.xml new file mode 100644 index 0000000..a6ba9db --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/pom.xml @@ -0,0 +1,89 @@ + + + 4.0.0 + + dev.langchain4j + langchain4j-spring + 0.37.0-SNAPSHOT + ../pom.xml + + + langchain4j-pgvector-spring-boot-starter + LangChain4j Spring Boot starter for PgVector + jar + + + + + dev.langchain4j + langchain4j-pgvector + + + + org.springframework.boot + spring-boot-starter + + + + org.springframework.boot + spring-boot-autoconfigure-processor + true + + + + + org.projectlombok + lombok + provided + + + + + org.springframework.boot + spring-boot-configuration-processor + true + + + + org.springframework.boot + spring-boot-starter-test + test + + + + dev.langchain4j + langchain4j-embeddings-all-minilm-l6-v2-q + test + + + + dev.langchain4j + langchain4j-spring-boot-tests + ${project.version} + tests + test-jar + test + + + + org.testcontainers + postgresql + test + + + + org.tinylog + tinylog-impl + test + + + + org.tinylog + slf4j-tinylog + test + + + + \ No newline at end of file diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/MetadataStorageConfigProperties.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/MetadataStorageConfigProperties.java new file mode 100644 index 0000000..d641271 --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/MetadataStorageConfigProperties.java @@ -0,0 +1,28 @@ +package dev.langchain4j.store.embedding.pgvector.spring; + +import dev.langchain4j.store.embedding.pgvector.DefaultMetadataStorageConfig; +import dev.langchain4j.store.embedding.pgvector.MetadataStorageConfig; +import dev.langchain4j.store.embedding.pgvector.MetadataStorageMode; +import lombok.Builder; +import lombok.Getter; +import lombok.Setter; + +import java.util.Collections; +import java.util.List; + +@Getter +@Setter +@Builder +public class MetadataStorageConfigProperties { + + private MetadataStorageMode storageMode; + + private List columnDefinitions; + + public static MetadataStorageConfigProperties defaultConfig() { + return MetadataStorageConfigProperties.builder() + .storageMode(MetadataStorageMode.COMBINED_JSON) + .columnDefinitions(Collections.singletonList("metadata JSON NULL")) + .build(); + } +} diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java new file mode 100644 index 0000000..0ff4888 --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java @@ -0,0 +1,61 @@ +package dev.langchain4j.store.embedding.pgvector.spring; + +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.store.embedding.pgvector.DefaultMetadataStorageConfig; +import dev.langchain4j.store.embedding.pgvector.MetadataStorageConfig; +import dev.langchain4j.store.embedding.pgvector.MetadataStorageMode; +import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; +import org.springframework.boot.autoconfigure.AutoConfiguration; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.lang.Nullable; + +import java.util.Optional; + +import static dev.langchain4j.internal.Utils.getOrDefault; +import static dev.langchain4j.store.embedding.pgvector.spring.PgVectorEmbeddingStoreProperties.*; + +@AutoConfiguration +@EnableConfigurationProperties(PgVectorEmbeddingStoreProperties.class) +@ConditionalOnProperty(prefix = PREFIX, name = "enabled", havingValue = "true", matchIfMissing = true) +public class PgVectorEmbeddingStoreAutoConfiguration { + + @Bean + @ConditionalOnMissingBean + public PgVectorEmbeddingStore pgVectorEmbeddingStore(PgVectorEmbeddingStoreProperties properties, + @Nullable EmbeddingModel embeddingModel) { + String host = Optional.ofNullable(properties.getHost()).orElse(DEFAULT_HOST); + int port = Optional.ofNullable(properties.getPort()).orElse(DEFAULT_PORT); + String database = Optional.ofNullable(properties.getDatabase()).orElse(DEFAULT_DATABASE); + String table = Optional.ofNullable(properties.getDatabase()).orElse(DEFAULT_TABLE); + Integer dimension = Optional.ofNullable(properties.getDimension()).orElseGet(() -> embeddingModel == null ? null : embeddingModel.dimension()); + + // get user and password from env variable + String user = Optional.ofNullable(properties.getUser()).orElse(System.getenv("PGVECTOR_USER")); + String password = Optional.ofNullable(properties.getPassword()).orElse(System.getenv("PGVECTOR_PASSWORD")); + + // get metadata storage config from spring properties + MetadataStorageConfigProperties metadataStorageConfigProperties = Optional.ofNullable(properties.getMetadataStorageConfig()).orElse(MetadataStorageConfigProperties.defaultConfig()); + MetadataStorageMode storageMode = Optional.ofNullable(metadataStorageConfigProperties.getStorageMode()).orElse(MetadataStorageMode.COMBINED_JSON); + MetadataStorageConfig metadataStorageConfig = DefaultMetadataStorageConfig.builder() + .storageMode(storageMode) + .columnDefinitions(metadataStorageConfigProperties.getColumnDefinitions()).build(); + + return PgVectorEmbeddingStore.builder() + .host(host) + .port(port) + .database(database) + .user(user) + .password(password) + .table(table) + .dimension(dimension) + .useIndex(getOrDefault(properties.getUseIndex(), false)) + .indexListSize(properties.getIndexListSize()) + .createTable(getOrDefault(properties.getCreateTable(), true)) + .dropTableFirst(getOrDefault(properties.getDropTableFirst(), false)) + .metadataStorageConfig(metadataStorageConfig) + .build(); + } +} diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java new file mode 100644 index 0000000..492021d --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java @@ -0,0 +1,34 @@ +package dev.langchain4j.store.embedding.pgvector.spring; + +import lombok.Getter; +import lombok.Setter; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.context.properties.NestedConfigurationProperty; + +@Getter +@Setter +@ConfigurationProperties(prefix = PgVectorEmbeddingStoreProperties.PREFIX) +public class PgVectorEmbeddingStoreProperties { + + static final String PREFIX = "langchain4j.pgvector"; + static final String DEFAULT_HOST = "localhost"; + static final int DEFAULT_PORT = 5432; + static final String DEFAULT_DATABASE = "langchain4j_database"; + static final String DEFAULT_TABLE = "langchain4j_table"; + + private String host; + private Integer port; + private String database; + private String table; + private Integer dimension; + private String user; + private String password; + + private Boolean useIndex; + private Integer indexListSize; + private Boolean createTable; + private Boolean dropTableFirst; + + @NestedConfigurationProperty + private MetadataStorageConfigProperties metadataStorageConfig; +} diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/langchain4j-pgvector-spring-boot-starter/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports new file mode 100644 index 0000000..31b427d --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports @@ -0,0 +1 @@ +dev.langchain4j.store.embedding.pgvector.spring.PgVectorEmbeddingStoreAutoConfiguration \ No newline at end of file diff --git a/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java b/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java new file mode 100644 index 0000000..7f66a3d --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java @@ -0,0 +1,71 @@ +package dev.langchain4j.store.embedding.pgvector.spring; + +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.embedding.onnx.allminilml6v2q.AllMiniLmL6V2QuantizedEmbeddingModel; +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; +import dev.langchain4j.store.embedding.spring.EmbeddingStoreAutoConfigurationIT; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; +import org.testcontainers.containers.PostgreSQLContainer; + +class PgVectorEmbeddingStoreAutoConfigurationIT extends EmbeddingStoreAutoConfigurationIT { + + static PostgreSQLContainer pgVector = new PostgreSQLContainer<>("pgvector/pgvector:pg16"); + static final String DEFAULT_TABLE = "test_langchain4j_table"; + + @BeforeAll + static void beforeAll() { + pgVector.start(); + } + + @AfterAll + static void afterAll() { + pgVector.stop(); + } + + @BeforeEach + void beforeEach() { + ApplicationContextRunner contextRunner = new ApplicationContextRunner() + .withConfiguration(AutoConfigurations.of(autoConfigurationClass())); + + contextRunner + .withBean(AllMiniLmL6V2QuantizedEmbeddingModel.class) + .withPropertyValues(properties()) + .run(context -> { + PgVectorEmbeddingStore embeddingStore = context.getBean(PgVectorEmbeddingStore.class); + embeddingStore.removeAll(); + }); + } + + @Override + protected Class autoConfigurationClass() { + return PgVectorEmbeddingStoreAutoConfiguration.class; + } + + @Override + protected Class> embeddingStoreClass() { + return PgVectorEmbeddingStore.class; + } + + @Override + protected String[] properties() { + return new String[]{ + "langchain4j.pgvector.host=" + pgVector.getHost(), + "langchain4j.pgvector.port=" + pgVector.getMappedPort(5432), + "langchain4j.pgvector.database=" + pgVector.getDatabaseName(), + "langchain4j.pgvector.user=" + pgVector.getUsername(), + "langchain4j.pgvector.password=" + pgVector.getPassword(), + "langchain4j.pgvector.table=" + DEFAULT_TABLE, + "langchain4j.pgvector.dimension=384" + }; + } + + @Override + protected String dimensionPropertyKey() { + return "langchain4j.pgvector.dimension"; + } +} diff --git a/pom.xml b/pom.xml index b87d59d..34a97bb 100644 --- a/pom.xml +++ b/pom.xml @@ -31,6 +31,7 @@ langchain4j-qianfan-spring-boot-starter langchain4j-milvus-spring-boot-starter langchain4j-dashscope-spring-boot-starter + langchain4j-pgvector-spring-boot-starter langchain4j-reactor From 96c4a72abfddb0263d4082eb1e9fbade42d7e6da Mon Sep 17 00:00:00 2001 From: misselvexu Date: Tue, 19 Nov 2024 13:51:47 +0800 Subject: [PATCH 2/4] Optimise the configuration options that pgvector exposes to the user, reusing DataSource instances --- .../pom.xml | 11 ++- .../MetadataStorageConfigProperties.java | 28 ------- ...VectorEmbeddingStoreAutoConfiguration.java | 55 ++++++------- .../PgVectorEmbeddingStoreProperties.java | 78 ++++++++++++++----- ...ctorEmbeddingStoreAutoConfigurationIT.java | 12 +-- pom.xml | 6 ++ 6 files changed, 101 insertions(+), 89 deletions(-) delete mode 100644 langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/MetadataStorageConfigProperties.java diff --git a/langchain4j-pgvector-spring-boot-starter/pom.xml b/langchain4j-pgvector-spring-boot-starter/pom.xml index a6ba9db..395288e 100644 --- a/langchain4j-pgvector-spring-boot-starter/pom.xml +++ b/langchain4j-pgvector-spring-boot-starter/pom.xml @@ -28,15 +28,14 @@ org.springframework.boot - spring-boot-autoconfigure-processor - true + spring-boot-starter-jdbc + provided - - org.projectlombok - lombok - provided + org.springframework.boot + spring-boot-autoconfigure-processor + true diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/MetadataStorageConfigProperties.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/MetadataStorageConfigProperties.java deleted file mode 100644 index d641271..0000000 --- a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/MetadataStorageConfigProperties.java +++ /dev/null @@ -1,28 +0,0 @@ -package dev.langchain4j.store.embedding.pgvector.spring; - -import dev.langchain4j.store.embedding.pgvector.DefaultMetadataStorageConfig; -import dev.langchain4j.store.embedding.pgvector.MetadataStorageConfig; -import dev.langchain4j.store.embedding.pgvector.MetadataStorageMode; -import lombok.Builder; -import lombok.Getter; -import lombok.Setter; - -import java.util.Collections; -import java.util.List; - -@Getter -@Setter -@Builder -public class MetadataStorageConfigProperties { - - private MetadataStorageMode storageMode; - - private List columnDefinitions; - - public static MetadataStorageConfigProperties defaultConfig() { - return MetadataStorageConfigProperties.builder() - .storageMode(MetadataStorageMode.COMBINED_JSON) - .columnDefinitions(Collections.singletonList("metadata JSON NULL")) - .build(); - } -} diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java index 0ff4888..be0ba1a 100644 --- a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java @@ -1,61 +1,54 @@ package dev.langchain4j.store.embedding.pgvector.spring; import dev.langchain4j.model.embedding.EmbeddingModel; -import dev.langchain4j.store.embedding.pgvector.DefaultMetadataStorageConfig; -import dev.langchain4j.store.embedding.pgvector.MetadataStorageConfig; -import dev.langchain4j.store.embedding.pgvector.MetadataStorageMode; import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; +import jakarta.annotation.PostConstruct; import org.springframework.boot.autoconfigure.AutoConfiguration; +import org.springframework.boot.autoconfigure.condition.ConditionalOnBean; +import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; import org.springframework.lang.Nullable; +import javax.sql.DataSource; import java.util.Optional; -import static dev.langchain4j.internal.Utils.getOrDefault; import static dev.langchain4j.store.embedding.pgvector.spring.PgVectorEmbeddingStoreProperties.*; @AutoConfiguration @EnableConfigurationProperties(PgVectorEmbeddingStoreProperties.class) @ConditionalOnProperty(prefix = PREFIX, name = "enabled", havingValue = "true", matchIfMissing = true) +@ConditionalOnClass({PgVectorEmbeddingStore.class, DataSource.class}) public class PgVectorEmbeddingStoreAutoConfiguration { @Bean @ConditionalOnMissingBean - public PgVectorEmbeddingStore pgVectorEmbeddingStore(PgVectorEmbeddingStoreProperties properties, + @ConditionalOnBean(DataSource.class) + public PgVectorEmbeddingStore pgVectorEmbeddingStore(DataSource dataSource, PgVectorEmbeddingStoreProperties properties, @Nullable EmbeddingModel embeddingModel) { - String host = Optional.ofNullable(properties.getHost()).orElse(DEFAULT_HOST); - int port = Optional.ofNullable(properties.getPort()).orElse(DEFAULT_PORT); - String database = Optional.ofNullable(properties.getDatabase()).orElse(DEFAULT_DATABASE); - String table = Optional.ofNullable(properties.getDatabase()).orElse(DEFAULT_TABLE); Integer dimension = Optional.ofNullable(properties.getDimension()).orElseGet(() -> embeddingModel == null ? null : embeddingModel.dimension()); - // get user and password from env variable - String user = Optional.ofNullable(properties.getUser()).orElse(System.getenv("PGVECTOR_USER")); - String password = Optional.ofNullable(properties.getPassword()).orElse(System.getenv("PGVECTOR_PASSWORD")); - - // get metadata storage config from spring properties - MetadataStorageConfigProperties metadataStorageConfigProperties = Optional.ofNullable(properties.getMetadataStorageConfig()).orElse(MetadataStorageConfigProperties.defaultConfig()); - MetadataStorageMode storageMode = Optional.ofNullable(metadataStorageConfigProperties.getStorageMode()).orElse(MetadataStorageMode.COMBINED_JSON); - MetadataStorageConfig metadataStorageConfig = DefaultMetadataStorageConfig.builder() - .storageMode(storageMode) - .columnDefinitions(metadataStorageConfigProperties.getColumnDefinitions()).build(); - - return PgVectorEmbeddingStore.builder() - .host(host) - .port(port) - .database(database) - .user(user) - .password(password) - .table(table) + return PgVectorEmbeddingStore.datasourceBuilder() + .datasource(dataSource) + .table(properties.getTable()) + .createTable(properties.getCreateTable()) .dimension(dimension) - .useIndex(getOrDefault(properties.getUseIndex(), false)) + .useIndex(properties.getUseIndex()) .indexListSize(properties.getIndexListSize()) - .createTable(getOrDefault(properties.getCreateTable(), true)) - .dropTableFirst(getOrDefault(properties.getDropTableFirst(), false)) - .metadataStorageConfig(metadataStorageConfig) .build(); } + + @Configuration + @ConditionalOnMissingBean(DataSource.class) + protected static class DataSourceNotFoundConfiguration { + @PostConstruct + public void logWarning() { + throw new IllegalStateException( + "No DataSource found. Please configure a DataSource (e.g., by including spring-boot-starter-jdbc) to use PgVector." + ); + } + } } diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java index 492021d..402e98a 100644 --- a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java @@ -1,34 +1,74 @@ package dev.langchain4j.store.embedding.pgvector.spring; -import lombok.Getter; -import lombok.Setter; import org.springframework.boot.context.properties.ConfigurationProperties; -import org.springframework.boot.context.properties.NestedConfigurationProperty; -@Getter -@Setter @ConfigurationProperties(prefix = PgVectorEmbeddingStoreProperties.PREFIX) public class PgVectorEmbeddingStoreProperties { static final String PREFIX = "langchain4j.pgvector"; - static final String DEFAULT_HOST = "localhost"; - static final int DEFAULT_PORT = 5432; - static final String DEFAULT_DATABASE = "langchain4j_database"; - static final String DEFAULT_TABLE = "langchain4j_table"; - - private String host; - private Integer port; - private String database; + + /** + * The database table. + */ private String table; + + /** + * The vector dimension. + */ private Integer dimension; - private String user; - private String password; + /** + * Should create table automatically, default value is false. + */ + private Boolean createTable; + + /** + * Should use IVFFlat index. + */ private Boolean useIndex; + + /** + * The IVFFlat number of lists. + */ private Integer indexListSize; - private Boolean createTable; - private Boolean dropTableFirst; - @NestedConfigurationProperty - private MetadataStorageConfigProperties metadataStorageConfig; + public String getTable() { + return table; + } + + public void setTable(String table) { + this.table = table; + } + + public Integer getDimension() { + return dimension; + } + + public void setDimension(Integer dimension) { + this.dimension = dimension; + } + + public Boolean getCreateTable() { + return createTable; + } + + public void setCreateTable(Boolean createTable) { + this.createTable = createTable; + } + + public Boolean getUseIndex() { + return useIndex; + } + + public void setUseIndex(Boolean useIndex) { + this.useIndex = useIndex; + } + + public Integer getIndexListSize() { + return indexListSize; + } + + public void setIndexListSize(Integer indexListSize) { + this.indexListSize = indexListSize; + } } diff --git a/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java b/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java index 7f66a3d..f81c3cd 100644 --- a/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java +++ b/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java @@ -54,12 +54,14 @@ protected Class> embeddingStoreClass() { @Override protected String[] properties() { return new String[]{ - "langchain4j.pgvector.host=" + pgVector.getHost(), - "langchain4j.pgvector.port=" + pgVector.getMappedPort(5432), - "langchain4j.pgvector.database=" + pgVector.getDatabaseName(), - "langchain4j.pgvector.user=" + pgVector.getUsername(), - "langchain4j.pgvector.password=" + pgVector.getPassword(), + "spring.datasource.url=" + pgVector.getJdbcUrl(), + "spring.datasource.username=" + pgVector.getUsername(), + "spring.datasource.password=" + pgVector.getPassword(), + "spring.datasource.driver-class-name=" + pgVector.getDriverClassName(), + "langchain4j.pgvector.create-table=true", "langchain4j.pgvector.table=" + DEFAULT_TABLE, + "langchain4j.pgvector.use-index=true", + "langchain4j.pgvector.index-list-size=100", "langchain4j.pgvector.dimension=384" }; } diff --git a/pom.xml b/pom.xml index 34a97bb..95a6260 100644 --- a/pom.xml +++ b/pom.xml @@ -69,6 +69,12 @@ ${spring.boot.version} + + org.springframework.boot + spring-boot-starter-jdbc + ${spring.boot.version} + + org.springframework.boot spring-boot-autoconfigure-processor From 2e107b92c379ff6cccb3cec87f31872739718fe0 Mon Sep 17 00:00:00 2001 From: misselvexu Date: Tue, 19 Nov 2024 18:48:44 +0800 Subject: [PATCH 3/4] Optimize pgvector starter data source auto-configuration logic. --- .../pom.xml | 6 -- .../spring/PgVectorDataSourceProperties.java | 87 +++++++++++++++++++ ...VectorEmbeddingStoreAutoConfiguration.java | 65 +++++++++++--- .../PgVectorEmbeddingStoreProperties.java | 2 +- ...ctorEmbeddingStoreAutoConfigurationIT.java | 12 +-- pom.xml | 6 -- 6 files changed, 147 insertions(+), 31 deletions(-) create mode 100644 langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorDataSourceProperties.java diff --git a/langchain4j-pgvector-spring-boot-starter/pom.xml b/langchain4j-pgvector-spring-boot-starter/pom.xml index 395288e..d249264 100644 --- a/langchain4j-pgvector-spring-boot-starter/pom.xml +++ b/langchain4j-pgvector-spring-boot-starter/pom.xml @@ -26,12 +26,6 @@ spring-boot-starter - - org.springframework.boot - spring-boot-starter-jdbc - provided - - org.springframework.boot spring-boot-autoconfigure-processor diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorDataSourceProperties.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorDataSourceProperties.java new file mode 100644 index 0000000..7341e70 --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorDataSourceProperties.java @@ -0,0 +1,87 @@ +package dev.langchain4j.store.embedding.pgvector.spring; + +import org.springframework.boot.context.properties.ConfigurationProperties; + +@ConfigurationProperties(prefix = PgVectorDataSourceProperties.PREFIX) +public class PgVectorDataSourceProperties { + + static final String PREFIX = "langchain4j.pgvector.datasource"; + + /** + * Enable postgres datasource configuration, default value false. + */ + private boolean enabled = false; + + /** + * The pgvector database host. + */ + private String host; + + /** + * The pgvector database user. + */ + private String user; + + /** + * The pgvector database password. + */ + private String password; + + /** + * The pgvector database port. + */ + private Integer port; + + /** + * The pgvector database name. + */ + private String database; + + public boolean isEnabled() { + return enabled; + } + + public void setEnabled(boolean enabled) { + this.enabled = enabled; + } + + public String getHost() { + return host; + } + + public void setHost(String host) { + this.host = host; + } + + public String getUser() { + return user; + } + + public void setUser(String user) { + this.user = user; + } + + public String getPassword() { + return password; + } + + public void setPassword(String password) { + this.password = password; + } + + public Integer getPort() { + return port; + } + + public void setPort(Integer port) { + this.port = port; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } +} diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java index be0ba1a..44fd5fa 100644 --- a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java @@ -2,33 +2,42 @@ import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; -import jakarta.annotation.PostConstruct; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.boot.autoconfigure.AutoConfiguration; import org.springframework.boot.autoconfigure.condition.ConditionalOnBean; -import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; import org.springframework.lang.Nullable; import javax.sql.DataSource; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.SQLException; import java.util.Optional; +import static dev.langchain4j.internal.ValidationUtils.*; import static dev.langchain4j.store.embedding.pgvector.spring.PgVectorEmbeddingStoreProperties.*; +import static org.springframework.util.StringUtils.startsWithIgnoreCase; @AutoConfiguration -@EnableConfigurationProperties(PgVectorEmbeddingStoreProperties.class) +@EnableConfigurationProperties({PgVectorEmbeddingStoreProperties.class, PgVectorDataSourceProperties.class}) @ConditionalOnProperty(prefix = PREFIX, name = "enabled", havingValue = "true", matchIfMissing = true) -@ConditionalOnClass({PgVectorEmbeddingStore.class, DataSource.class}) public class PgVectorEmbeddingStoreAutoConfiguration { + private static final Logger log = LoggerFactory.getLogger(PgVectorEmbeddingStoreAutoConfiguration.class); + @Bean @ConditionalOnMissingBean @ConditionalOnBean(DataSource.class) + @ConditionalOnProperty(prefix = PgVectorDataSourceProperties.PREFIX, name = "enabled", havingValue = "false") public PgVectorEmbeddingStore pgVectorEmbeddingStore(DataSource dataSource, PgVectorEmbeddingStoreProperties properties, @Nullable EmbeddingModel embeddingModel) { + // Check if the context's data source is a Postgres datasource + ensureTrue(isPostgresqlDataSource(dataSource), "The DataSource in Spring Context is not a Postgres datasource, you need to manually specify the Postgres datasource configuration via 'langchain4j.pgvector.datasource'."); + Integer dimension = Optional.ofNullable(properties.getDimension()).orElseGet(() -> embeddingModel == null ? null : embeddingModel.dimension()); return PgVectorEmbeddingStore.datasourceBuilder() @@ -41,14 +50,44 @@ public PgVectorEmbeddingStore pgVectorEmbeddingStore(DataSource dataSource, PgVe .build(); } - @Configuration - @ConditionalOnMissingBean(DataSource.class) - protected static class DataSourceNotFoundConfiguration { - @PostConstruct - public void logWarning() { - throw new IllegalStateException( - "No DataSource found. Please configure a DataSource (e.g., by including spring-boot-starter-jdbc) to use PgVector." - ); + @Bean + @ConditionalOnMissingBean + @ConditionalOnProperty(prefix = PgVectorDataSourceProperties.PREFIX, name = "enabled", havingValue = "true") + public PgVectorEmbeddingStore pgVectorEmbeddingStore(PgVectorEmbeddingStoreProperties properties, PgVectorDataSourceProperties dataSourceProperties, + @Nullable EmbeddingModel embeddingModel) { + Integer dimension = Optional.ofNullable(properties.getDimension()).orElseGet(() -> embeddingModel == null ? null : embeddingModel.dimension()); + String host = ensureNotBlank(dataSourceProperties.getHost(), "langchain4j.pgvector.datasource.host"); + Integer port = ensureGreaterThanZero(dataSourceProperties.getPort(), "langchain4j.pgvector.datasource.port"); + String user = ensureNotBlank(dataSourceProperties.getUser(), "langchain4j.pgvector.datasource.user"); + String password = ensureNotBlank(dataSourceProperties.getPassword(), "langchain4j.pgvector.datasource.password"); + String database = ensureNotBlank(dataSourceProperties.getDatabase(), "langchain4j.pgvector.datasource.database"); + + return PgVectorEmbeddingStore.builder() + .host(host) + .port(port) + .user(user) + .password(password) + .database(database) + .table(properties.getTable()) + .createTable(properties.getCreateTable()) + .dimension(dimension) + .useIndex(properties.getUseIndex()) + .indexListSize(properties.getIndexListSize()) + .build(); + } + + /** + * Check if the datasource is postgresql`. + * @param dataSource instance of {@link DataSource}. + * @return true means it is a postgresql data source, otherwise it is not. + */ + private boolean isPostgresqlDataSource(DataSource dataSource) { + try (Connection connection = dataSource.getConnection()) { + DatabaseMetaData metaData = connection.getMetaData(); + return startsWithIgnoreCase(metaData.getURL(), "jdbc:postgresql"); + } catch (SQLException e) { + log.warn("Exception checking datasource driver type during PgVector auto-configuration ."); + return false; } } } diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java index 402e98a..63e9829 100644 --- a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java @@ -8,7 +8,7 @@ public class PgVectorEmbeddingStoreProperties { static final String PREFIX = "langchain4j.pgvector"; /** - * The database table. + * The pgvector database table. */ private String table; diff --git a/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java b/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java index f81c3cd..f23d330 100644 --- a/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java +++ b/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java @@ -54,12 +54,14 @@ protected Class> embeddingStoreClass() { @Override protected String[] properties() { return new String[]{ - "spring.datasource.url=" + pgVector.getJdbcUrl(), - "spring.datasource.username=" + pgVector.getUsername(), - "spring.datasource.password=" + pgVector.getPassword(), - "spring.datasource.driver-class-name=" + pgVector.getDriverClassName(), - "langchain4j.pgvector.create-table=true", + "langchain4j.pgvector.datasource.enabled=true", + "langchain4j.pgvector.datasource.host=" + pgVector.getHost(), + "langchain4j.pgvector.datasource.port=" + pgVector.getMappedPort(5432), + "langchain4j.pgvector.datasource.user=" + pgVector.getUsername(), + "langchain4j.pgvector.datasource.password=" + pgVector.getPassword(), + "langchain4j.pgvector.datasource.database=" + pgVector.getDatabaseName(), "langchain4j.pgvector.table=" + DEFAULT_TABLE, + "langchain4j.pgvector.create-table=true", "langchain4j.pgvector.use-index=true", "langchain4j.pgvector.index-list-size=100", "langchain4j.pgvector.dimension=384" diff --git a/pom.xml b/pom.xml index 95a6260..34a97bb 100644 --- a/pom.xml +++ b/pom.xml @@ -69,12 +69,6 @@ ${spring.boot.version} - - org.springframework.boot - spring-boot-starter-jdbc - ${spring.boot.version} - - org.springframework.boot spring-boot-autoconfigure-processor From e29c321ffd4af8e3dd0a7d5d8e928b316b26b305 Mon Sep 17 00:00:00 2001 From: misselvexu Date: Tue, 19 Nov 2024 19:05:43 +0800 Subject: [PATCH 4/4] Remove redundant parameter legitimacy checks. --- ...VectorEmbeddingStoreAutoConfiguration.java | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java index 44fd5fa..418f7be 100644 --- a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java @@ -33,7 +33,7 @@ public class PgVectorEmbeddingStoreAutoConfiguration { @ConditionalOnMissingBean @ConditionalOnBean(DataSource.class) @ConditionalOnProperty(prefix = PgVectorDataSourceProperties.PREFIX, name = "enabled", havingValue = "false") - public PgVectorEmbeddingStore pgVectorEmbeddingStore(DataSource dataSource, PgVectorEmbeddingStoreProperties properties, + public PgVectorEmbeddingStore pgVectorEmbeddingStoreWithExistingDataSource(DataSource dataSource, PgVectorEmbeddingStoreProperties properties, @Nullable EmbeddingModel embeddingModel) { // Check if the context's data source is a Postgres datasource ensureTrue(isPostgresqlDataSource(dataSource), "The DataSource in Spring Context is not a Postgres datasource, you need to manually specify the Postgres datasource configuration via 'langchain4j.pgvector.datasource'."); @@ -53,21 +53,16 @@ public PgVectorEmbeddingStore pgVectorEmbeddingStore(DataSource dataSource, PgVe @Bean @ConditionalOnMissingBean @ConditionalOnProperty(prefix = PgVectorDataSourceProperties.PREFIX, name = "enabled", havingValue = "true") - public PgVectorEmbeddingStore pgVectorEmbeddingStore(PgVectorEmbeddingStoreProperties properties, PgVectorDataSourceProperties dataSourceProperties, + public PgVectorEmbeddingStore pgVectorEmbeddingStoreWithCustomDataSource(PgVectorEmbeddingStoreProperties properties, PgVectorDataSourceProperties dataSourceProperties, @Nullable EmbeddingModel embeddingModel) { Integer dimension = Optional.ofNullable(properties.getDimension()).orElseGet(() -> embeddingModel == null ? null : embeddingModel.dimension()); - String host = ensureNotBlank(dataSourceProperties.getHost(), "langchain4j.pgvector.datasource.host"); - Integer port = ensureGreaterThanZero(dataSourceProperties.getPort(), "langchain4j.pgvector.datasource.port"); - String user = ensureNotBlank(dataSourceProperties.getUser(), "langchain4j.pgvector.datasource.user"); - String password = ensureNotBlank(dataSourceProperties.getPassword(), "langchain4j.pgvector.datasource.password"); - String database = ensureNotBlank(dataSourceProperties.getDatabase(), "langchain4j.pgvector.datasource.database"); return PgVectorEmbeddingStore.builder() - .host(host) - .port(port) - .user(user) - .password(password) - .database(database) + .host(dataSourceProperties.getHost()) + .port(dataSourceProperties.getPort()) + .user(dataSourceProperties.getUser()) + .password(dataSourceProperties.getPassword()) + .database(dataSourceProperties.getDatabase()) .table(properties.getTable()) .createTable(properties.getCreateTable()) .dimension(dimension)