diff --git a/langchain4j-pgvector-spring-boot-starter/pom.xml b/langchain4j-pgvector-spring-boot-starter/pom.xml new file mode 100644 index 0000000..d249264 --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/pom.xml @@ -0,0 +1,82 @@ + + + 4.0.0 + + dev.langchain4j + langchain4j-spring + 0.37.0-SNAPSHOT + ../pom.xml + + + langchain4j-pgvector-spring-boot-starter + LangChain4j Spring Boot starter for PgVector + jar + + + + + dev.langchain4j + langchain4j-pgvector + + + + org.springframework.boot + spring-boot-starter + + + + org.springframework.boot + spring-boot-autoconfigure-processor + true + + + + + org.springframework.boot + spring-boot-configuration-processor + true + + + + org.springframework.boot + spring-boot-starter-test + test + + + + dev.langchain4j + langchain4j-embeddings-all-minilm-l6-v2-q + test + + + + dev.langchain4j + langchain4j-spring-boot-tests + ${project.version} + tests + test-jar + test + + + + org.testcontainers + postgresql + test + + + + org.tinylog + tinylog-impl + test + + + + org.tinylog + slf4j-tinylog + test + + + + \ No newline at end of file diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorDataSourceProperties.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorDataSourceProperties.java new file mode 100644 index 0000000..7341e70 --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorDataSourceProperties.java @@ -0,0 +1,87 @@ +package dev.langchain4j.store.embedding.pgvector.spring; + +import org.springframework.boot.context.properties.ConfigurationProperties; + +@ConfigurationProperties(prefix = PgVectorDataSourceProperties.PREFIX) +public class PgVectorDataSourceProperties { + + static final String PREFIX = "langchain4j.pgvector.datasource"; + + /** + * Enable postgres datasource configuration, default value false. + */ + private boolean enabled = false; + + /** + * The pgvector database host. + */ + private String host; + + /** + * The pgvector database user. + */ + private String user; + + /** + * The pgvector database password. + */ + private String password; + + /** + * The pgvector database port. + */ + private Integer port; + + /** + * The pgvector database name. + */ + private String database; + + public boolean isEnabled() { + return enabled; + } + + public void setEnabled(boolean enabled) { + this.enabled = enabled; + } + + public String getHost() { + return host; + } + + public void setHost(String host) { + this.host = host; + } + + public String getUser() { + return user; + } + + public void setUser(String user) { + this.user = user; + } + + public String getPassword() { + return password; + } + + public void setPassword(String password) { + this.password = password; + } + + public Integer getPort() { + return port; + } + + public void setPort(Integer port) { + this.port = port; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } +} diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java new file mode 100644 index 0000000..418f7be --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfiguration.java @@ -0,0 +1,88 @@ +package dev.langchain4j.store.embedding.pgvector.spring; + +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.boot.autoconfigure.AutoConfiguration; +import org.springframework.boot.autoconfigure.condition.ConditionalOnBean; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.lang.Nullable; + +import javax.sql.DataSource; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.SQLException; +import java.util.Optional; + +import static dev.langchain4j.internal.ValidationUtils.*; +import static dev.langchain4j.store.embedding.pgvector.spring.PgVectorEmbeddingStoreProperties.*; +import static org.springframework.util.StringUtils.startsWithIgnoreCase; + +@AutoConfiguration +@EnableConfigurationProperties({PgVectorEmbeddingStoreProperties.class, PgVectorDataSourceProperties.class}) +@ConditionalOnProperty(prefix = PREFIX, name = "enabled", havingValue = "true", matchIfMissing = true) +public class PgVectorEmbeddingStoreAutoConfiguration { + + private static final Logger log = LoggerFactory.getLogger(PgVectorEmbeddingStoreAutoConfiguration.class); + + @Bean + @ConditionalOnMissingBean + @ConditionalOnBean(DataSource.class) + @ConditionalOnProperty(prefix = PgVectorDataSourceProperties.PREFIX, name = "enabled", havingValue = "false") + public PgVectorEmbeddingStore pgVectorEmbeddingStoreWithExistingDataSource(DataSource dataSource, PgVectorEmbeddingStoreProperties properties, + @Nullable EmbeddingModel embeddingModel) { + // Check if the context's data source is a Postgres datasource + ensureTrue(isPostgresqlDataSource(dataSource), "The DataSource in Spring Context is not a Postgres datasource, you need to manually specify the Postgres datasource configuration via 'langchain4j.pgvector.datasource'."); + + Integer dimension = Optional.ofNullable(properties.getDimension()).orElseGet(() -> embeddingModel == null ? null : embeddingModel.dimension()); + + return PgVectorEmbeddingStore.datasourceBuilder() + .datasource(dataSource) + .table(properties.getTable()) + .createTable(properties.getCreateTable()) + .dimension(dimension) + .useIndex(properties.getUseIndex()) + .indexListSize(properties.getIndexListSize()) + .build(); + } + + @Bean + @ConditionalOnMissingBean + @ConditionalOnProperty(prefix = PgVectorDataSourceProperties.PREFIX, name = "enabled", havingValue = "true") + public PgVectorEmbeddingStore pgVectorEmbeddingStoreWithCustomDataSource(PgVectorEmbeddingStoreProperties properties, PgVectorDataSourceProperties dataSourceProperties, + @Nullable EmbeddingModel embeddingModel) { + Integer dimension = Optional.ofNullable(properties.getDimension()).orElseGet(() -> embeddingModel == null ? null : embeddingModel.dimension()); + + return PgVectorEmbeddingStore.builder() + .host(dataSourceProperties.getHost()) + .port(dataSourceProperties.getPort()) + .user(dataSourceProperties.getUser()) + .password(dataSourceProperties.getPassword()) + .database(dataSourceProperties.getDatabase()) + .table(properties.getTable()) + .createTable(properties.getCreateTable()) + .dimension(dimension) + .useIndex(properties.getUseIndex()) + .indexListSize(properties.getIndexListSize()) + .build(); + } + + /** + * Check if the datasource is postgresql`. + * @param dataSource instance of {@link DataSource}. + * @return true means it is a postgresql data source, otherwise it is not. + */ + private boolean isPostgresqlDataSource(DataSource dataSource) { + try (Connection connection = dataSource.getConnection()) { + DatabaseMetaData metaData = connection.getMetaData(); + return startsWithIgnoreCase(metaData.getURL(), "jdbc:postgresql"); + } catch (SQLException e) { + log.warn("Exception checking datasource driver type during PgVector auto-configuration ."); + return false; + } + } +} diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java new file mode 100644 index 0000000..63e9829 --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/main/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreProperties.java @@ -0,0 +1,74 @@ +package dev.langchain4j.store.embedding.pgvector.spring; + +import org.springframework.boot.context.properties.ConfigurationProperties; + +@ConfigurationProperties(prefix = PgVectorEmbeddingStoreProperties.PREFIX) +public class PgVectorEmbeddingStoreProperties { + + static final String PREFIX = "langchain4j.pgvector"; + + /** + * The pgvector database table. + */ + private String table; + + /** + * The vector dimension. + */ + private Integer dimension; + + /** + * Should create table automatically, default value is false. + */ + private Boolean createTable; + + /** + * Should use IVFFlat index. + */ + private Boolean useIndex; + + /** + * The IVFFlat number of lists. + */ + private Integer indexListSize; + + public String getTable() { + return table; + } + + public void setTable(String table) { + this.table = table; + } + + public Integer getDimension() { + return dimension; + } + + public void setDimension(Integer dimension) { + this.dimension = dimension; + } + + public Boolean getCreateTable() { + return createTable; + } + + public void setCreateTable(Boolean createTable) { + this.createTable = createTable; + } + + public Boolean getUseIndex() { + return useIndex; + } + + public void setUseIndex(Boolean useIndex) { + this.useIndex = useIndex; + } + + public Integer getIndexListSize() { + return indexListSize; + } + + public void setIndexListSize(Integer indexListSize) { + this.indexListSize = indexListSize; + } +} diff --git a/langchain4j-pgvector-spring-boot-starter/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/langchain4j-pgvector-spring-boot-starter/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports new file mode 100644 index 0000000..31b427d --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports @@ -0,0 +1 @@ +dev.langchain4j.store.embedding.pgvector.spring.PgVectorEmbeddingStoreAutoConfiguration \ No newline at end of file diff --git a/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java b/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java new file mode 100644 index 0000000..f23d330 --- /dev/null +++ b/langchain4j-pgvector-spring-boot-starter/src/test/java/dev/langchain4j/store/embedding/pgvector/spring/PgVectorEmbeddingStoreAutoConfigurationIT.java @@ -0,0 +1,75 @@ +package dev.langchain4j.store.embedding.pgvector.spring; + +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.embedding.onnx.allminilml6v2q.AllMiniLmL6V2QuantizedEmbeddingModel; +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; +import dev.langchain4j.store.embedding.spring.EmbeddingStoreAutoConfigurationIT; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; +import org.testcontainers.containers.PostgreSQLContainer; + +class PgVectorEmbeddingStoreAutoConfigurationIT extends EmbeddingStoreAutoConfigurationIT { + + static PostgreSQLContainer pgVector = new PostgreSQLContainer<>("pgvector/pgvector:pg16"); + static final String DEFAULT_TABLE = "test_langchain4j_table"; + + @BeforeAll + static void beforeAll() { + pgVector.start(); + } + + @AfterAll + static void afterAll() { + pgVector.stop(); + } + + @BeforeEach + void beforeEach() { + ApplicationContextRunner contextRunner = new ApplicationContextRunner() + .withConfiguration(AutoConfigurations.of(autoConfigurationClass())); + + contextRunner + .withBean(AllMiniLmL6V2QuantizedEmbeddingModel.class) + .withPropertyValues(properties()) + .run(context -> { + PgVectorEmbeddingStore embeddingStore = context.getBean(PgVectorEmbeddingStore.class); + embeddingStore.removeAll(); + }); + } + + @Override + protected Class autoConfigurationClass() { + return PgVectorEmbeddingStoreAutoConfiguration.class; + } + + @Override + protected Class> embeddingStoreClass() { + return PgVectorEmbeddingStore.class; + } + + @Override + protected String[] properties() { + return new String[]{ + "langchain4j.pgvector.datasource.enabled=true", + "langchain4j.pgvector.datasource.host=" + pgVector.getHost(), + "langchain4j.pgvector.datasource.port=" + pgVector.getMappedPort(5432), + "langchain4j.pgvector.datasource.user=" + pgVector.getUsername(), + "langchain4j.pgvector.datasource.password=" + pgVector.getPassword(), + "langchain4j.pgvector.datasource.database=" + pgVector.getDatabaseName(), + "langchain4j.pgvector.table=" + DEFAULT_TABLE, + "langchain4j.pgvector.create-table=true", + "langchain4j.pgvector.use-index=true", + "langchain4j.pgvector.index-list-size=100", + "langchain4j.pgvector.dimension=384" + }; + } + + @Override + protected String dimensionPropertyKey() { + return "langchain4j.pgvector.dimension"; + } +} diff --git a/pom.xml b/pom.xml index b87d59d..34a97bb 100644 --- a/pom.xml +++ b/pom.xml @@ -31,6 +31,7 @@ langchain4j-qianfan-spring-boot-starter langchain4j-milvus-spring-boot-starter langchain4j-dashscope-spring-boot-starter + langchain4j-pgvector-spring-boot-starter langchain4j-reactor