Skip to content

Commit

Permalink
docs: add comments and docstrings (#343)
Browse files Browse the repository at this point in the history
* doc: add docs to main

* doc: add documetation to data repository

* doc: add documentataion to isar and tantivy data providers
  • Loading branch information
Sivan22 authored Nov 21, 2024
1 parent e5f3514 commit 7f8e9b8
Show file tree
Hide file tree
Showing 4 changed files with 303 additions and 11 deletions.
116 changes: 111 additions & 5 deletions lib/data/data_providers/isar_data_provider.dart
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,24 @@ import 'package:fuzzywuzzy/fuzzywuzzy.dart';
import 'package:otzaria/models/library.dart';
import 'package:pdfrx/pdfrx.dart';

/// IsarDataProvider manages all database operations using the Isar database.
///
/// This provider handles:
/// - Storage and retrieval of book references
/// - Management of text lines from books
/// - Search operations across references and lines
/// - Progress tracking for database operations
class IsarDataProvider {
/// Singleton instance of the IsarDataProvider
static final IsarDataProvider _singleton = IsarDataProvider();

/// Provides access to the singleton instance
static IsarDataProvider get instance => _singleton;

IsarDataProvider();

/// Isar database instance configured with the library path from settings
/// and schemas for Refs and Lines collections
final isar = Isar.open(
directory: Settings.getValue<String>('key-library-path') ?? 'C:\\אוצריא',
maxSizeMiB: null,
Expand All @@ -24,29 +36,51 @@ class IsarDataProvider {
LineSchema,
],
);

/// Notifies listeners about the number of books processed for references
ValueNotifier<int?> refsNumOfbooksDone = ValueNotifier(null);

/// Notifies listeners about the total number of books to process for references
ValueNotifier<int?> refsNumOfbooksTotal = ValueNotifier(null);

/// Notifies listeners about the number of books processed for lines
ValueNotifier<int?> linesNumOfbooksDone = ValueNotifier(null);

/// Notifies listeners about the total number of books to process for lines
ValueNotifier<int?> linesNumOfbooksTotal = ValueNotifier(null);

/// Creates references from a library's books and stores them in the database
///
/// This method processes both text books and PDF books, creating references from their
/// table of contents and storing them in the Isar database. It tracks progress using
/// value notifiers.
///
/// Parameters:
/// - [library]: The library containing books to process
/// - [startIndex]: The index to start processing from
Future<void> createRefsFromLibrary(Library library, int startIndex) async {
// Clear existing references before creating new ones
isar.write((isar) => isar.refs.clear());
int i = 0;
final allBooks =
library.getAllBooks().whereType<TextBook>().skip(startIndex);
refsNumOfbooksTotal.value = allBooks.length;

// Process text books
for (TextBook book in allBooks) {
try {
print('Creating refs for ${book.title} (${i++}/${allBooks.length})');
refsNumOfbooksDone.value = i - 1;
List<Ref> refs = [];
final List<TocEntry> toc = await book.tableOfContents;
//get all TocEntries recursively

// Collect all TOC entries recursively with their full path
List<TocEntry> alltocs = [];

void searchToc(List<TocEntry> entries) {
for (final TocEntry entry in entries) {
alltocs.add(entry);
// Append parent text to child entries for full context
for (final child in entry.children) {
child.text = '${entry.text},${child.text}';
}
Expand All @@ -55,6 +89,8 @@ class IsarDataProvider {
}

searchToc(toc);

// Create references for each title variant
for (String title in book.extraTitles ?? [book.title]) {
for (final TocEntry entry in alltocs) {
final ref = Ref(
Expand All @@ -76,16 +112,19 @@ class IsarDataProvider {
print(' Failed creating refs for ${book.title} $e');
}
}

// Process PDF books
final pdfBooks =
library.getAllBooks().whereType<PdfBook>().skip(startIndex).toList();
refsNumOfbooksTotal.value = pdfBooks.length;

for (int i = 0; i < pdfBooks.length; i++) {
refsNumOfbooksDone.value = i;
// Extract PDF outline (table of contents)
final List<PdfOutlineNode> outlines =
await PdfDocument.openFile(pdfBooks[i].path)
.then((value) => value.loadOutline());

//get all TocEntries recursively
List<PdfOutlineNode> alloutlines = [];

void searchOutline(List<PdfOutlineNode> entries) {
Expand All @@ -97,6 +136,7 @@ class IsarDataProvider {

searchOutline(outlines);

// Create references from PDF outline
for (final PdfOutlineNode entry in alloutlines) {
final ref = Ref(
id: isar.refs.autoIncrement(),
Expand All @@ -110,18 +150,35 @@ class IsarDataProvider {
isar.write((isar) => isar.refs.put(ref));
}
}

// Reset progress notifiers
refsNumOfbooksDone.value = null;
refsNumOfbooksTotal.value = null;
}

/// Retrieves all references for a specific book
///
/// Parameters:
/// - [book]: The book whose references should be retrieved
///
/// Returns a list of [Ref] objects associated with the book
List<Ref> getRefsForBook(TextBook book) {
return isar.refs.where().bookTitleEqualTo(book.title).findAll();
}

/// Retrieves all references from the database
///
/// Returns a list of all [Ref] objects stored in the database
List<Ref> getAllRefs() {
return isar.refs.where().findAll();
}

/// Searches for references containing all parts of the given reference string
///
/// Parameters:
/// - [ref]: The reference string to search for
///
/// Returns a [Future] that completes with matching [Ref] objects
Future<List<Ref>> findRefs(String ref) {
final parts = ref.split(' ');
return isar.refs
Expand All @@ -133,15 +190,28 @@ class IsarDataProvider {
.findAllAsync();
}

/// Searches for references by relevance to a given reference string
///
/// Uses fuzzy matching to find the most relevant references, processing
/// matches in a separate isolate for better performance.
///
/// Parameters:
/// - [ref]: The reference string to search for
/// - [limit]: Maximum number of results per book (defaults to 10)
///
/// Returns a [Future] that completes with a list of [Ref] objects sorted by relevance
Future<List<Ref>> findRefsByRelevance(String ref, {int limit = 10}) async {
var refs = await findRefs(ref);
// reduce the number of refs by taking the top N of each book

// Process matches in a separate isolate for better performance
refs = await Isolate.run(() {
List<Ref> takenRefs = [];
final gruops = refs.groupBy((ref) => ref.bookTitle);
// Take top N matches from each book
for (final gruop in gruops.keys) {
takenRefs += (gruops[gruop]!.take(limit)).toList();
}
// Sort by fuzzy match ratio
takenRefs.sort((a, b) {
final scoreA = ratio(ref, a.ref);
final scoreB = ratio(ref, b.ref);
Expand All @@ -150,11 +220,12 @@ class IsarDataProvider {
return takenRefs;
});

// sort by ratio

return refs;
}

/// Gets the number of unique books that have references in the database
///
/// Returns a [Future] that completes with the count of books with references
Future<int> getNumberOfBooksWithRefs() async {
final allRefs = await isar.refs.where().findAllAsync();
final books = await Isolate.run(() {
Expand All @@ -163,6 +234,10 @@ class IsarDataProvider {
return books.length;
}

/// Adds all lines from all text books in the library to the database
///
/// Parameters:
/// - [library]: The library containing books to process
Future<void> addAllLines(Library library) async {
final books = library.getAllBooks().whereType<TextBook>().toList();
linesNumOfbooksTotal.value = books.length;
Expand All @@ -175,10 +250,15 @@ class IsarDataProvider {
}
}

/// Adds all lines from a specific book to the database
///
/// Parameters:
/// - [book]: The book whose lines should be added
Future<void> addLinesForBook(TextBook book) async {
final texts = (await book.text).split('\n');
final List<Line> lines = [];

// Create Line objects for each line of text
for (int i = 0; i < texts.length; i++) {
final line = Line(
id: isar.lines.autoIncrement(),
Expand All @@ -194,20 +274,46 @@ class IsarDataProvider {
isar.write((isar) => isar.lines.putAll(lines));
}

/// Retrieves all lines for a specific book
///
/// Parameters:
/// - [book]: The book whose lines should be retrieved
///
/// Returns a [Future] that completes with a list of [Line] objects
Future<List<Line>> getLinesForBook(TextBook book) async {
return isar.lines.where().bookTitleEqualTo(book.title).findAll();
}

/// Retrieves all lines from the database
///
/// Returns a [Future] that completes with a list of all [Line] objects
Future<List<Line>> getAllLines() async {
return isar.lines.where().findAll();
}

/// Searches for lines containing the given text
///
/// Parameters:
/// - [text]: The text to search for within lines
///
/// Returns a [Future] that completes with matching [Line] objects
Future<List<Line>> findLines(String text) async {
return isar.lines.where().textContains(text).findAllAsync();
}
}

/// Extension on [Iterable] to add grouping functionality
///
/// Provides a [groupBy] method that groups elements by a key function,
/// similar to SQL GROUP BY or LINQ GroupBy.
extension Iterables<E> on Iterable<E> {
/// Groups elements by a key function
///
/// Parameters:
/// - [keyFunction]: Function that extracts the grouping key from an element
///
/// Returns a [Map] where keys are the grouping keys and values are lists of
/// elements that share that key
Map<K, List<E>> groupBy<K>(K Function(E) keyFunction) => fold(
<K, List<E>>{},
(Map<K, List<E>> map, E element) =>
Expand Down
Loading

0 comments on commit 7f8e9b8

Please sign in to comment.