Skip to content

Commit

Permalink
feat(artifact): support ephemeral catalog
Browse files Browse the repository at this point in the history
  • Loading branch information
Yougigun committed Nov 27, 2024
1 parent a7cef64 commit fc504ad
Show file tree
Hide file tree
Showing 8 changed files with 486 additions and 59 deletions.
2 changes: 1 addition & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ database:
host: pg-sql
port: 5432
name: artifact
version: 18
version: 19
timezone: Etc/UTC
pool:
idleconnections: 5
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ require (
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1
github.com/influxdata/influxdb-client-go/v2 v2.12.3
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20241029162707-1398399a24ee
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20241125163328-c29704e47ba4
github.com/instill-ai/usage-client v0.3.0-alpha.0.20240319060111-4a3a39f2fd61
github.com/instill-ai/x v0.3.0-alpha.0.20231219052200-6230a89e386c
github.com/knadh/koanf v1.5.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,8 @@ github.com/influxdata/influxdb-client-go/v2 v2.12.3 h1:28nRlNMRIV4QbtIUvxhWqaxn0
github.com/influxdata/influxdb-client-go/v2 v2.12.3/go.mod h1:IrrLUbCjjfkmRuaCiGQg4m2GbkaeJDcuWoxiWdQEbA0=
github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 h1:W9WBk7wlPfJLvMCdtV4zPulc4uCPrlywQOmbFOhgQNU=
github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20241029162707-1398399a24ee h1:onnzrn5jabO3jDLPo2193Ql6YMRyDWDx9K834Bfi8V0=
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20241029162707-1398399a24ee/go.mod h1:rf0UY7VpEgpaLudYEcjx5rnbuwlBaaLyD4FQmWLtgAY=
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20241125163328-c29704e47ba4 h1:k8X9gMiCwHWShB1FITaWwmlzthFnor1Jj0tSaFG+9x8=
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20241125163328-c29704e47ba4/go.mod h1:rf0UY7VpEgpaLudYEcjx5rnbuwlBaaLyD4FQmWLtgAY=
github.com/instill-ai/usage-client v0.3.0-alpha.0.20240319060111-4a3a39f2fd61 h1:smPTvmXDhn/QC7y/TPXyMTqbbRd0gvzmFgWBChwTfhE=
github.com/instill-ai/usage-client v0.3.0-alpha.0.20240319060111-4a3a39f2fd61/go.mod h1:/TAHs4ybuylk5icuy+MQtHRc4XUnIyXzeNKxX9qDFhw=
github.com/instill-ai/x v0.3.0-alpha.0.20231219052200-6230a89e386c h1:a2RVkpIV2QcrGnSHAou+t/L+vBsaIfFvk5inVg5Uh4s=
Expand Down
4 changes: 4 additions & 0 deletions pkg/db/migration/000019_refactor_kb_for_chat.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
BEGIN;
-- Remove catalog_type column from knowledge_base table
ALTER TABLE knowledge_base DROP COLUMN catalog_type;
COMMIT;
10 changes: 10 additions & 0 deletions pkg/db/migration/000019_refactor_kb_for_chat.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
BEGIN;

-- Add catalog_type column to knowledge_base table
ALTER TABLE knowledge_base
ADD COLUMN catalog_type VARCHAR(255) DEFAULT 'CATALOG_TYPE_PERSISTENT';

-- Add comment for the new column
COMMENT ON COLUMN knowledge_base.catalog_type IS 'Type of the knowledge base catalog';

COMMIT;
87 changes: 54 additions & 33 deletions pkg/handler/knowledgebase.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (ph *PublicHandler) CreateCatalog(ctx context.Context, req *artifactpb.Crea
// check if user has reached the maximum number of catalogs
// note: the simple implementation have race condition to bypass the check,
// but it is okay for now
kbCount, err := ph.service.Repository.GetKnowledgeBaseCountByOwner(ctx, ns.NsUID.String())
kbCount, err := ph.service.Repository.GetKnowledgeBaseCountByOwner(ctx, ns.NsUID.String(), artifactpb.CatalogType_CATALOG_TYPE_PERSISTENT)
if err != nil {
log.Error("failed to get catalog count", zap.Error(err))
return nil, fmt.Errorf(ErrorCreateKnowledgeBaseMsg, err)
Expand Down Expand Up @@ -117,6 +117,11 @@ func (ph *PublicHandler) CreateCatalog(ctx context.Context, req *artifactpb.Crea
return nil
}

// if catalog type is not set, set it to persistent
if req.GetType() == artifactpb.CatalogType_CATALOG_TYPE_UNSPECIFIED {
req.Type = artifactpb.CatalogType_CATALOG_TYPE_PERSISTENT
}

// create catalog
dbData, err := ph.service.Repository.CreateKnowledgeBase(ctx,
repository.KnowledgeBase{
Expand All @@ -127,6 +132,7 @@ func (ph *PublicHandler) CreateCatalog(ctx context.Context, req *artifactpb.Crea
Tags: req.Tags,
Owner: ns.NsUID.String(),
CreatorUID: creatorUUID,
CatalogType: req.GetType().String(),
}, callExternalService,
)
if err != nil {
Expand All @@ -135,20 +141,29 @@ func (ph *PublicHandler) CreateCatalog(ctx context.Context, req *artifactpb.Crea

return &artifactpb.CreateCatalogResponse{
Catalog: &artifactpb.Catalog{
Name: dbData.Name,
CatalogId: dbData.KbID,
Description: dbData.Description,
Tags: dbData.Tags,
OwnerName: dbData.Owner,
CreateTime: dbData.CreateTime.String(),
UpdateTime: dbData.UpdateTime.String(),
ConvertingPipelines: []string{"preset/indexing-convert-pdf"},
SplittingPipelines: []string{"preset/indexing-split-text", "preset/indexing-split-markdown"},
EmbeddingPipelines: []string{"preset/indexing-embed"},
DownstreamApps: []string{},
TotalFiles: 0,
TotalTokens: 0,
UsedStorage: 0,
Name: dbData.Name,
CatalogUid: dbData.UID.String(),
CatalogId: dbData.KbID,
Description: dbData.Description,
Tags: dbData.Tags,
OwnerName: dbData.Owner,
CreateTime: dbData.CreateTime.String(),
UpdateTime: dbData.UpdateTime.String(),
ConvertingPipelines: []string{
service.NamespaceID + "/" + service.ConvertDocToMDPipelineID + "@" + service.DocToMDVersion,
service.NamespaceID + "/" + service.ConvertDocToMDPipelineID2 + "@" + service.DocToMDVersion2,
},
SplittingPipelines: []string{
service.NamespaceID + "/" + service.TextChunkPipelineID + "@" + service.TextSplitVersion,
service.NamespaceID + "/" + service.MdChunkPipelineID + "@" + service.MdSplitVersion,
},
EmbeddingPipelines: []string{
service.NamespaceID + "/" + service.TextEmbedPipelineID + "@" + service.TextEmbedVersion,
},
DownstreamApps: []string{},
TotalFiles: 0,
TotalTokens: 0,
UsedStorage: 0,
},
}, nil
}
Expand Down Expand Up @@ -183,7 +198,7 @@ func (ph *PublicHandler) ListCatalogs(ctx context.Context, req *artifactpb.ListC
return nil, fmt.Errorf("failed to check namespace permission. err:%w", err)
}

dbData, err := ph.service.Repository.ListKnowledgeBases(ctx, ns.NsUID.String())
dbData, err := ph.service.Repository.ListKnowledgeBasesByCatalogType(ctx, ns.NsUID.String(), artifactpb.CatalogType_CATALOG_TYPE_PERSISTENT)
if err != nil {
log.Error("failed to get catalogs", zap.Error(err))
return nil, fmt.Errorf(ErrorListKnowledgeBasesMsg, err)
Expand All @@ -207,23 +222,29 @@ func (ph *PublicHandler) ListCatalogs(ctx context.Context, req *artifactpb.ListC
kbs := make([]*artifactpb.Catalog, len(dbData))
for i, kb := range dbData {
kbs[i] = &artifactpb.Catalog{
CatalogUid: kb.UID.String(),
Name: kb.Name,
CatalogId: kb.KbID,
Description: kb.Description,
Tags: kb.Tags,
CreateTime: kb.CreateTime.String(),
UpdateTime: kb.UpdateTime.String(),
OwnerName: kb.Owner,
ConvertingPipelines: []string{service.NamespaceID + "/" + service.ConvertDocToMDPipelineID},
CatalogUid: kb.UID.String(),
Name: kb.Name,
CatalogId: kb.KbID,
Description: kb.Description,
Tags: kb.Tags,
CreateTime: kb.CreateTime.String(),
UpdateTime: kb.UpdateTime.String(),
OwnerName: kb.Owner,
ConvertingPipelines: []string{
service.NamespaceID + "/" + service.ConvertDocToMDPipelineID + "@" + service.DocToMDVersion,
service.NamespaceID + "/" + service.ConvertDocToMDPipelineID2 + "@" + service.DocToMDVersion2,
},
SplittingPipelines: []string{
service.NamespaceID + "/" + service.TextChunkPipelineID,
service.NamespaceID + "/" + service.MdChunkPipelineID},
EmbeddingPipelines: []string{service.NamespaceID + "/" + service.TextEmbedPipelineID},
DownstreamApps: []string{},
TotalFiles: uint32(fileCounts[kb.UID]),
TotalTokens: uint32(tokenCounts[kb.UID]),
UsedStorage: uint64(kb.Usage),
service.NamespaceID + "/" + service.TextChunkPipelineID + "@" + service.TextSplitVersion,
service.NamespaceID + "/" + service.MdChunkPipelineID + "@" + service.MdSplitVersion,
},
EmbeddingPipelines: []string{
service.NamespaceID + "/" + service.TextEmbedPipelineID + "@" + service.TextEmbedVersion,
},
DownstreamApps: []string{},
TotalFiles: uint32(fileCounts[kb.UID]),
TotalTokens: uint32(tokenCounts[kb.UID]),
UsedStorage: uint64(kb.Usage),
}
}
return &artifactpb.ListCatalogsResponse{
Expand All @@ -241,7 +262,7 @@ func (ph *PublicHandler) UpdateCatalog(ctx context.Context, req *artifactpb.Upda
if req.CatalogId == "" {
log.Error("kb_id is empty", zap.Error(ErrCheckRequiredFields))
return nil, fmt.Errorf("kb_id is empty. err: %w", ErrCheckRequiredFields)
}
}

ns, err := ph.service.GetNamespaceByNsID(ctx, req.GetNamespaceId())
if err != nil {
Expand Down
Loading

0 comments on commit fc504ad

Please sign in to comment.