Skip to content

Commit

Permalink
Implement indexing for library
Browse files Browse the repository at this point in the history
This commit implements several new database indexes for providing the library API features.

It doesn't implement all the library methods yet, but it does use those indexes in the old ListDocuments call,
which should make it much more efficient, beyond providing more information.
  • Loading branch information
burdiyan committed Dec 16, 2024
1 parent 1c6c519 commit 9b5bbbe
Show file tree
Hide file tree
Showing 53 changed files with 35,254 additions and 2,810 deletions.
219 changes: 168 additions & 51 deletions backend/api/documents/v3alpha/documents.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package documents

import (
"context"
"encoding/json"
"errors"
"fmt"
"math"
Expand All @@ -18,6 +19,7 @@ import (
"seed/backend/util/maybe"
"seed/backend/util/sqlite"
"seed/backend/util/sqlite/sqlitex"
"strings"
"time"

blocks "github.com/ipfs/go-block-format"
Expand All @@ -33,6 +35,9 @@ import (

// Server implements Documents API v3.
type Server struct {
documents.UnimplementedCommentsServer
documents.UnimplementedDocumentsServer

keys core.KeyStore
idx *blob.Index
db *sqlitex.Pool
Expand Down Expand Up @@ -300,27 +305,26 @@ func (srv *Server) ListDocuments(ctx context.Context, in *documents.ListDocument
return nil, fmt.Errorf("failed to decode account: %w", err)
}

type Cursor struct {
ID int64 `json:"i"`
var cursor = struct {
IRI string `json:"i"`
ActivityTime int64 `json:"t"`
}{
IRI: "\uFFFF", // MaxString.
ActivityTime: math.MaxInt64,
}

var (
count int32
lastCursor = Cursor{
ID: math.MaxInt64,
if in.PageToken != "" {
if err := apiutil.DecodePageToken(in.PageToken, &cursor, nil); err != nil {
return nil, status.Errorf(codes.InvalidArgument, "%v", err)
}
)
}

var count int32

if in.PageSize <= 0 {
in.PageSize = 30
}

if in.PageToken != "" {
if err := apiutil.DecodePageToken(in.PageToken, &lastCursor, nil); err != nil {
return nil, status.Errorf(codes.InvalidArgument, "%v", err)
}
}

out := &documents.ListDocumentsResponse{
Documents: make([]*documents.DocumentListItem, 0, in.PageSize),
}
Expand All @@ -331,63 +335,176 @@ func (srv *Server) ListDocuments(ctx context.Context, in *documents.ListDocument
if err != nil {
return nil, err
}
requests := []*documents.GetDocumentRequest{}
if err = sqlitex.Exec(conn, qListDocuments(), func(stmt *sqlite.Stmt) error {
defer release()

lookup := blob.NewLookupCache(conn)

rows, check := sqlitex.Query(conn, qLoadDocumentList(), namespaceGlob, cursor.ActivityTime, cursor.IRI, in.PageSize)
for row := range rows {
if count == in.PageSize {
var err error
out.NextPageToken, err = apiutil.EncodePageToken(lastCursor, nil)
return err
out.NextPageToken, err = apiutil.EncodePageToken(cursor, nil)
break
}
count++

var (
id = stmt.ColumnInt64(0)
iri = stmt.ColumnText(1)
)
item, ierr := documentListItemFromRow(lookup, row)
if ierr != nil {
err = ierr
break
}

lastCursor.ID = id
cursor.ActivityTime = item.ActivitySummary.LatestChangeTime.AsTime().UnixMilli()
cursor.IRI = "hm://" + item.Account + "/" + item.Path
cursor.IRI = strings.TrimSuffix(cursor.IRI, "/")

// TODO(burdiyan): This is a hack to get the account from the IRI.
u, err := url.Parse(iri)
if err != nil {
return err
out.Documents = append(out.Documents, item)
}

err = errors.Join(err, check())
if err != nil {
return nil, err
}

return out, nil
}

func documentListItemFromRow(lookup *blob.LookupCache, row *sqlite.Stmt) (*documents.DocumentListItem, error) {
inc := sqlite.NewIncrementor(0)
var (
iriRaw = row.ColumnText(inc())
genesis = row.ColumnText(inc())
metadataJSON = row.ColumnBytesUnsafe(inc())
commentCount = row.ColumnInt64(inc())
headsJSON = row.ColumnBytesUnsafe(inc())
authorsJSON = row.ColumnBytesUnsafe(inc())
genesisChangeTime = row.ColumnInt64(inc())
lastCommentTime = row.ColumnInt64(inc())
lastChangeTime = row.ColumnInt64(inc())
lastActivityTime = row.ColumnInt64(inc())
_ = lastActivityTime
)

iri := blob.IRI(iriRaw)
space, path, err := iri.SpacePath()
if err != nil {
return nil, err
}

var attrs blob.DocIndexedAttrs
if err := json.Unmarshal(metadataJSON, &attrs); err != nil {
return nil, err
}

metadata := make(map[string]string, len(attrs))
for k, v := range attrs {
var vv string
switch iv := v.Value.(type) {
case string:
vv = iv
case int64, int:
vv = fmt.Sprintf("%d", iv)
}

path := u.Path
// Since we don't want to call getDocument here, we buffer the request
// Otherwise we would have a transaction inside a transaction leading to
// a deadlock.
requests = append(requests, &documents.GetDocumentRequest{
Account: u.Host,
Path: path,
})
metadata[k] = vv
}

return nil
}, lastCursor.ID, namespaceGlob, in.PageSize); err != nil {
release()
var authorIDs []int64
if err := json.Unmarshal(authorsJSON, &authorIDs); err != nil {
return nil, err
}
release()
for _, req := range requests {
doc, err := srv.GetDocument(ctx, req)

authors := make([]string, len(authorIDs))
for i, a := range authorIDs {
aa, err := lookup.PublicKey(a)
if err != nil {
continue
return nil, err
}
out.Documents = append(out.Documents, DocumentToListItem(doc))
authors[i] = aa.String()
}

var headIDs []int64
if err := json.Unmarshal(headsJSON, &headIDs); err != nil {
return nil, err
}

cids := make([]cid.Cid, len(headIDs))
for i, h := range headIDs {
cids[i], err = lookup.CID(h)
if err != nil {
return nil, err
}
}

crumbIRIs := iri.Breadcrumbs()
crumbIRIs = crumbIRIs[:len(crumbIRIs)-1] // Minus 1 to skip the current document.

var crumbs []*documents.Breadcrumb
if len(crumbIRIs) > 0 {
crumbs = make([]*documents.Breadcrumb, len(crumbIRIs))

for i, iri := range crumbIRIs[:len(crumbIRIs)-1] { // Minus one to skip the current document
title, found, err := lookup.DocumentTitle(iri)
if err != nil {
return nil, err
}

_, path, err := iri.SpacePath()
if err != nil {
return nil, err
}

crumb := &documents.Breadcrumb{
Name: title,
Path: path,
IsMissing: !found,
}

crumbs[i] = crumb
}
}

out := &documents.DocumentListItem{
Account: space.String(),
Path: path,
Metadata: metadata,
Authors: authors,
CreateTime: timestamppb.New(time.UnixMilli(genesisChangeTime)),
UpdateTime: timestamppb.New(time.UnixMilli(lastChangeTime)),
Genesis: genesis,
Version: blob.NewVersion(cids...).String(),
Breadcrumbs: crumbs,
ActivitySummary: &documents.ActivitySummary{
CommentCount: int32(commentCount),

Check failure on line 477 in backend/api/documents/v3alpha/documents.go

View workflow job for this annotation

GitHub Actions / lint-go

G115: integer overflow conversion int64 -> int32 (gosec)
LatestCommentTime: timestamppb.New(time.UnixMilli(lastCommentTime)),
LatestChangeTime: timestamppb.New(time.UnixMilli(lastChangeTime)),
},
}

return out, nil
}

var qListDocuments = dqb.Str(`
var qLoadDocumentList = dqb.Str(`
-- namespace_glob, cursor_activity, cursor_iri, page_size
SELECT
id,
iri
FROM resources
WHERE id < :last_cursor
AND iri GLOB :namespace_glob
ORDER BY id DESC
LIMIT :page_size + 1;
r.iri,
dg.genesis,
dg.metadata,
dg.comment_count,
dg.heads,
dg.authors,
dg.genesis_change_time,
dg.last_comment_time,
dg.last_change_time,
dg.last_activity_time
FROM document_generations dg
JOIN resources r ON r.id = dg.resource
AND r.iri GLOB ?1
AND dg.is_deleted = 0
WHERE last_activity_time < ?2
AND r.iri < ?3
GROUP BY dg.resource HAVING dg.generation = MAX(dg.generation)
ORDER BY last_activity_time DESC
LIMIT ?4 + 1;
`)

// DeleteDocument implements Documents API v3.
Expand Down
23 changes: 18 additions & 5 deletions backend/api/documents/v3alpha/documents_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,17 @@ func TestListDocument(t *testing.T) {
want := []*documents.DocumentListItem{DocumentToListItem(namedDoc2), DocumentToListItem(namedDoc), DocumentToListItem(profile)}
require.Len(t, list.Documents, len(want))

testutil.StructsEqual(want[0], list.Documents[0]).Compare(t, "named2 must be the first doc in the list")
testutil.StructsEqual(want[1], list.Documents[1]).Compare(t, "named must be the second doc in the list")
testutil.StructsEqual(want[2], list.Documents[2]).Compare(t, "profile doc must be the last element in the list")
testutil.StructsEqual(want[0], list.Documents[0]).
IgnoreFields(documents.DocumentListItem{}, "Breadcrumbs", "ActivitySummary").
Compare(t, "named2 must be the first doc in the list")

testutil.StructsEqual(want[1], list.Documents[1]).
IgnoreFields(documents.DocumentListItem{}, "Breadcrumbs", "ActivitySummary").
Compare(t, "named must be the second doc in the list")

testutil.StructsEqual(want[2], list.Documents[2]).
IgnoreFields(documents.DocumentListItem{}, "Breadcrumbs", "ActivitySummary").
Compare(t, "profile doc must be the last element in the list")
}

func TestGetDocumentWithVersion(t *testing.T) {
Expand Down Expand Up @@ -587,7 +595,10 @@ func TestTombstoneRef(t *testing.T) {
})
require.NoError(t, err)
require.Len(t, list.Documents, 1, "only initial root document must be in the list")
testutil.StructsEqual(DocumentToListItem(home), list.Documents[0]).Compare(t, "listing must only show home document")

testutil.StructsEqual(DocumentToListItem(home), list.Documents[0]).
IgnoreFields(documents.DocumentListItem{}, "Breadcrumbs", "ActivitySummary").
Compare(t, "listing must only show home document")
}

// But we also want to list the deleted docs.
Expand Down Expand Up @@ -667,7 +678,9 @@ func TestTombstoneRef(t *testing.T) {
slices.SortFunc(want.Documents, func(a, b *documents.DocumentListItem) int { return cmp.Compare(a.Version, b.Version) })
slices.SortFunc(list.Documents, func(a, b *documents.DocumentListItem) int { return cmp.Compare(a.Version, b.Version) })

testutil.StructsEqual(want, list).Compare(t, "listing must contain home doc and republished doc")
testutil.StructsEqual(want, list).
IgnoreFields(documents.DocumentListItem{}, "Breadcrumbs", "ActivitySummary").
Compare(t, "listing must contain home doc and republished doc")
}

// Changes with no base version must fail when there's a live document.
Expand Down
2 changes: 1 addition & 1 deletion backend/blob/blob_capability.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func indexCapability(ictx *indexingCtx, id int64, c cid.Cid, v *Capability) erro
return err
}

sb.Meta = map[string]any{
sb.ExtraAttrs = map[string]any{
"role": v.Role,
"del": del,
}
Expand Down
Loading

0 comments on commit 9b5bbbe

Please sign in to comment.