Skip to content

Commit

Permalink
Swap fuse for lunr (#112)
Browse files Browse the repository at this point in the history
* Swap fuse for lunr. Lunr has real inverted search index, and better word matching (price -> pricing).
  • Loading branch information
scosman authored Jul 30, 2024
1 parent cd22dbc commit d9cef83
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 47 deletions.
23 changes: 13 additions & 10 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
"@types/glob": "^8.1.0",
"@types/html-to-text": "^9.0.4",
"@types/jsdom": "^21.1.7",
"@types/lunr": "^2.3.7",
"@typescript-eslint/eslint-plugin": "^6.20.0",
"@typescript-eslint/parser": "^6.19.0",
"autoprefixer": "^10.4.15",
"daisyui": "^4.7.3",
"eslint": "^8.28.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-svelte": "^2.30.0",
"fuse.js": "^7.0.0",
"html-to-text": "^9.0.5",
"jsdom": "^24.1.1",
"postcss": "^8.4.31",
Expand All @@ -47,6 +47,7 @@
"@supabase/auth-helpers-sveltekit": "^0.11.0",
"@supabase/auth-ui-svelte": "^0.2.9",
"@supabase/supabase-js": "^2.33.0",
"lunr": "^2.3.9",
"resend": "^3.5.0",
"stripe": "^13.3.0"
}
Expand Down
43 changes: 34 additions & 9 deletions src/lib/build_index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,25 @@ import fs from "fs"
import glob from "glob"
import { convert } from "html-to-text"
import JSDOM from "jsdom"
import Fuse from "fuse.js"
import lunr from "lunr"

const excludePaths = ["/search"]

export async function buildSearchIndex() {
const indexData = []
const docs = []
const indexDocs: {
title: string
description: string
body: string
id: number
}[] = []

// iterate all files with html extension in ./svelte-kit/output/prerendered/pages
const fileRoot = path.resolve(".")
const pagesPath = path.join(fileRoot, ".svelte-kit/output/prerendered/pages")

const allFiles = glob.sync(path.join(pagesPath, "**/*.html"))
for (const file of allFiles) {
for (const [i, file] of allFiles.entries()) {
try {
const webPath = file
.replace(pagesPath, "")
Expand Down Expand Up @@ -43,23 +49,42 @@ export async function buildSearchIndex() {
dom.window.document
.querySelector('meta[name="description"]')
?.getAttribute("content") || ""
indexData.push({
docs.push({
title,
description,
body: plaintext,
path: webPath,
})
indexDocs.push({
title,
description,
body: plaintext,
id: i,
})
} catch (e) {
console.log("Blog search indexing error", file, e)
}
}

const index = Fuse.createIndex(["title", "description", "body"], indexData)
const jsonIndex = index.toJSON()
const data = { index: jsonIndex, indexData, buildTime: Date.now() }
return data
const index = lunr(function () {
this.field("title", { boost: 3 })
this.field("description", { boost: 2 })
this.field("body", { boost: 1 })
this.ref("id")

indexDocs.forEach((doc) => {
this.add(doc)
}, this)
})

return {
index: JSON.stringify(index),
docs,
buildTime: Date.now(),
}
}

// Use this if you want to integrate intyou your build process manually.
// Default install achieves similar result by setting prerender=true fore /search/api route.
export async function buildAndCacheSearchIndex() {
const data = await buildSearchIndex()
// write index data to file, overwriting static file on build
Expand Down
51 changes: 24 additions & 27 deletions src/routes/(marketing)/search/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@
import { page } from "$app/stores"
import { browser } from "$app/environment"
import { onMount } from "svelte"
import Fuse from "fuse.js"
import { goto } from "$app/navigation"
import { dev } from "$app/environment"
import lunr from "lunr"
const fuseOptions = {
keys: ["title", "description", "body"],
ignoreLocation: true,
threshold: 0.3,
type Result = {
title: string
description: string
path: string
}
let fuse: Fuse<Result> | undefined
let results: Result[] = []
let index: lunr.Index | undefined
let docs: Result[] = []
let loading = true
let error = false
Expand All @@ -23,9 +25,11 @@
throw new Error(`HTTP error! status: ${response.status}`)
}
const searchData = await response.json()
if (searchData && searchData.index && searchData.indexData) {
const index = Fuse.parseIndex(searchData.index)
fuse = new Fuse<Result>(searchData.indexData, fuseOptions, index)
if (searchData && searchData.index && searchData.docs) {
//index = elasticlunr.Index.load(searchData.index)
let indexData = JSON.parse(searchData.index)
index = lunr.Index.load(indexData)
docs = searchData.docs
}
} catch (e) {
console.error("Failed to load search data", e)
Expand All @@ -36,21 +40,14 @@
}
})
type Result = {
item: {
title: string
description: string
body: string
path: string
}
}
let results: Result[] = []
// searchQuery is $page.url.hash minus the "#" at the beginning if present
let searchQuery = decodeURIComponent($page.url.hash.slice(1) ?? "")
$: {
if (fuse) {
results = fuse.search(searchQuery)
if (searchQuery.length == 0) {
results = []
} else if (index) {
let indexResults = index.search(searchQuery)
results = indexResults.map((r) => docs[parseInt(r.ref)])
}
}
// Update the URL hash when searchQuery changes so the browser can bookmark/share the search results
Expand Down Expand Up @@ -123,26 +120,26 @@
<div class="text-center mt-10 text-accent text-xl">No results found</div>
{#if dev}
<div class="text-center mt-4 font-mono">
Development mode only message: if you're missing content, rebuild your
local search index with `npm run build`
Development mode message: if you're missing content, rebuild your local
search index with `npm run build`
</div>
{/if}
{/if}

<div>
{#each results as result, i}
<a
href={result.item.path || "/"}
href={result.path || "/"}
id="search-result-{i + 1}"
class="card my-6 bg-white shadow-xl flex-row overflow-hidden focus:border"
>
<div class="flex-none w-6 md:w-32 bg-secondary"></div>
<div class="py-6 px-6">
<div class="text-xl">{result.item.title}</div>
<div class="text-xl">{result.title}</div>
<div class="text-sm text-accent">
{result.item.path}
{result.path}
</div>
<div class="text-slate-500">{result.item.description}</div>
<div class="text-slate-500">{result.description}</div>
</div>
</a>
{/each}
Expand Down

0 comments on commit d9cef83

Please sign in to comment.