Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(index): add presync to be support building full StructureDoc in a delayed behavior #3495

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 14 additions & 16 deletions crates/tabby-index/src/indexer_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,13 @@
let updated_at = chrono::Utc::now();
let res = tokio::runtime::Runtime::new().unwrap().block_on(async {
let updated = indexer
.sync(
StructuredDocState {
updated_at,
deleted: false,
},
doc,
)
.await;
.presync(StructuredDocState {
id: doc.id().to_string(),
updated_at,
deleted: false,
})
.await

Check warning on line 79 in crates/tabby-index/src/indexer_tests.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-index/src/indexer_tests.rs#L79

Added line #L79 was not covered by tests
&& indexer.sync(doc).await;
println!("{}", updated);
updated
});
Expand Down Expand Up @@ -123,14 +122,13 @@
let updated_at = chrono::Utc::now();
let res = tokio::runtime::Runtime::new().unwrap().block_on(async {
let updated = indexer
.sync(
StructuredDocState {
updated_at,
deleted: false,
},
doc,
)
.await;
.presync(StructuredDocState {
id: doc.id().to_string(),
updated_at,
deleted: false,
})
.await

Check warning on line 130 in crates/tabby-index/src/indexer_tests.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-index/src/indexer_tests.rs#L130

Added line #L130 was not covered by tests
&& indexer.sync(doc).await;
println!("{}", updated);
updated
});
Expand Down
40 changes: 20 additions & 20 deletions crates/tabby-index/src/structured_doc/public.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
// For instance, a closed pull request will be marked as deleted,
// prompting the indexer to remove it from the index.
pub deleted: bool,
pub id: String,
}

pub struct StructuredDocIndexer {
Expand All @@ -39,21 +40,34 @@
Self { indexer, builder }
}

// Runs pre-sync checks to determine if the document needs to be updated.
// Returns false if `sync` is not required to be called.
pub async fn presync(&self, state: StructuredDocState) -> bool {
if state.deleted {
self.indexer.delete(&state.id);
return false;

Check warning on line 48 in crates/tabby-index/src/structured_doc/public.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-index/src/structured_doc/public.rs#L47-L48

Added lines #L47 - L48 were not covered by tests
}

if self.indexer.is_indexed_after(&state.id, state.updated_at)
&& !self.indexer.has_failed_chunks(&state.id)

Check warning on line 52 in crates/tabby-index/src/structured_doc/public.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-index/src/structured_doc/public.rs#L52

Added line #L52 was not covered by tests
{
return false;

Check warning on line 54 in crates/tabby-index/src/structured_doc/public.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-index/src/structured_doc/public.rs#L54

Added line #L54 was not covered by tests
};

true
}

// The sync process updates the document in the indexer incrementally.
// It first determines whether the document requires an update.
//
// If an update is needed, it checks the deletion state of the document.
// If the document is marked as deleted, it will be removed.
// Next, the document is rebuilt, the original is deleted, and the newly indexed document is added.
pub async fn sync(&self, state: StructuredDocState, document: StructuredDoc) -> bool {
if !self.require_updates(state.updated_at, &document) {
pub async fn sync(&self, document: StructuredDoc) -> bool {
if document.should_skip() {
return false;
}

if state.deleted {
return self.delete(document.id()).await;
}

stream! {
let (id, s) = self.builder.build(document).await;
self.indexer.delete(&id);
Expand All @@ -79,18 +93,4 @@
pub fn commit(self) {
self.indexer.commit();
}

fn require_updates(&self, updated_at: DateTime<Utc>, document: &StructuredDoc) -> bool {
if document.should_skip() {
return false;
}

if self.indexer.is_indexed_after(document.id(), updated_at)
&& !self.indexer.has_failed_chunks(document.id())
{
return false;
};

true
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
let mut count = 0;
let mut num_updated = 0;
for await (state, doc) in issue_stream.chain(pull_stream) {
if index.sync(state, doc).await {
if index.presync(state).await && index.sync(doc).await {

Check warning on line 145 in ee/tabby-webserver/src/service/background_job/third_party_integration.rs

View check run for this annotation

Codecov / codecov/patch

ee/tabby-webserver/src/service/background_job/third_party_integration.rs#L145

Added line #L145 was not covered by tests
num_updated += 1
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
})
};
yield (StructuredDocState {
id: doc.id().to_string(),

Check warning on line 69 in ee/tabby-webserver/src/service/background_job/third_party_integration/issues.rs

View check run for this annotation

Codecov / codecov/patch

ee/tabby-webserver/src/service/background_job/third_party_integration/issues.rs#L69

Added line #L69 was not covered by tests
updated_at: issue.updated_at,
deleted: false,
}, doc);
Expand Down Expand Up @@ -125,6 +126,7 @@
closed: issue.state == "closed",
})};
yield (StructuredDocState {
id: doc.id().to_string(),

Check warning on line 129 in ee/tabby-webserver/src/service/background_job/third_party_integration/issues.rs

View check run for this annotation

Codecov / codecov/patch

ee/tabby-webserver/src/service/background_job/third_party_integration/issues.rs#L129

Added line #L129 was not covered by tests
updated_at: issue.updated_at,
deleted: false,
}, doc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
if let Some(state) = pull.state {
if state == IssueState::Closed && pull.merged_at.is_none() {
yield (StructuredDocState{
id: doc.id().to_string(),

Check warning on line 67 in ee/tabby-webserver/src/service/background_job/third_party_integration/pulls.rs

View check run for this annotation

Codecov / codecov/patch

ee/tabby-webserver/src/service/background_job/third_party_integration/pulls.rs#L67

Added line #L67 was not covered by tests
updated_at: pull.updated_at.unwrap(),
deleted: true,
}, doc);
Expand Down Expand Up @@ -99,6 +100,7 @@


yield (StructuredDocState{
id: doc.id().to_string(),

Check warning on line 103 in ee/tabby-webserver/src/service/background_job/third_party_integration/pulls.rs

View check run for this annotation

Codecov / codecov/patch

ee/tabby-webserver/src/service/background_job/third_party_integration/pulls.rs#L103

Added line #L103 was not covered by tests
updated_at: pull.updated_at.unwrap(),
deleted: false,
}, doc);
Expand Down
20 changes: 11 additions & 9 deletions ee/tabby-webserver/src/service/background_job/web_crawler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,17 @@
};

num_docs += 1;
indexer
.sync(
StructuredDocState {
updated_at: Utc::now(),
deleted: false,
},
source_doc,
)
.await;

if indexer
.presync(StructuredDocState {
id: source_doc.id().to_string(),
updated_at: Utc::now(),
deleted: false,
})
.await

Check warning on line 64 in ee/tabby-webserver/src/service/background_job/web_crawler.rs

View check run for this annotation

Codecov / codecov/patch

ee/tabby-webserver/src/service/background_job/web_crawler.rs#L57-L64

Added lines #L57 - L64 were not covered by tests
{
indexer.sync(source_doc).await;
}

Check warning on line 67 in ee/tabby-webserver/src/service/background_job/web_crawler.rs

View check run for this annotation

Codecov / codecov/patch

ee/tabby-webserver/src/service/background_job/web_crawler.rs#L66-L67

Added lines #L66 - L67 were not covered by tests
}
logkit::info!("Crawled {} documents from '{}'", num_docs, self.url);
indexer.commit();
Expand Down
Loading