Skip to content

Commit

Permalink
Merge pull request #404 from GalacticHypernova/patch-2
Browse files Browse the repository at this point in the history
perf: avoid cheerio in favor of regex
  • Loading branch information
vejja authored May 17, 2024
2 parents 99d6dee + fd760b1 commit 60ddf61
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 176 deletions.
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
"dependencies": {
"@nuxt/kit": "^3.11.2",
"basic-auth": "^2.0.1",
"cheerio": "^1.0.0-rc.12",
"defu": "^6.1.1",
"nuxt-csurf": "^1.5.1",
"pathe": "^1.0.0",
Expand Down
3 changes: 0 additions & 3 deletions src/module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,6 @@ export default defineNuxtModule<ModuleOptions>({
// Register nitro plugin to manage security rules at the level of each route
addServerPlugin(resolver.resolve('./runtime/nitro/plugins/00-routeRules'))

// Pre-process HTML into DOM tree
addServerPlugin(resolver.resolve('./runtime/nitro/plugins/10-preprocessHtml'))

// Register nitro plugin to enable Subresource Integrity
addServerPlugin(resolver.resolve('./runtime/nitro/plugins/20-subresourceIntegrity'))

Expand Down
32 changes: 0 additions & 32 deletions src/runtime/nitro/plugins/10-preprocessHtml.ts

This file was deleted.

58 changes: 23 additions & 35 deletions src/runtime/nitro/plugins/20-subresourceIntegrity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ import { defineNitroPlugin } from '#imports'
//@ts-expect-error : we are importing from the virtual file system
import sriHashes from '#sri-hashes'
import { resolveSecurityRules } from '../context'
import type { Section } from '../../../types/module'

const SCRIPT_RE = /<script((?=[^>]+\bsrc="([^"]+)")(?![^>]+\bintegrity="[^"]+")[^>]+)(?:\/>|><\/script>)/g
const LINK_RE = /<link((?=[^>]+\brel="(?:stylesheet|preload|modulepreload)")(?=[^>]+\bhref="([^"]+)")(?![^>]+\bintegrity="[\w\-+/=]+")[^>]+)>/g

/**
* This plugin adds Subresource Integrity (SRI) hashes to script and link tags in the HTML.
Expand All @@ -17,47 +21,31 @@ export default defineNitroPlugin((nitroApp) => {
// Scan all relevant sections of the NuxtRenderHtmlContext
// Note: integrity can only be set on scripts and on links with rel preload, modulepreload and stylesheet
// However the SRI standard provides that other elements may be added to that list in the future
type Section = 'body' | 'bodyAppend' | 'bodyPrepend' | 'head'
const sections = ['body', 'bodyAppend', 'bodyPrepend', 'head'] as Section[]
const cheerios = event.context.security!.cheerios!
for (const section of sections) {
cheerios[section].forEach($ => {
// Add integrity to all relevant script tags
$('script').each((i, script) => {
const scriptAttrs = $(script).attr()
const src = scriptAttrs?.src
const integrity = scriptAttrs?.integrity
// Only add integrity to external scripts that do not already have one
if (src && !integrity) {
// Get the integrity hash from our static database
const hash = sriHashes[src]
// Set the integrity hash in HTML if found
if (hash) {
$(script).attr('integrity', hash)
}
html[section] = html[section].map(element => {
element = element.replace(SCRIPT_RE, (match, rest: string, src: string) => {
const hash = sriHashes[src]
if (hash) {
const integrityScript = `<script integrity="${hash}"${rest}></script>`
return integrityScript
} else {
return match
}
})
// Add integrity to all relevant link tags
$('link').each((i, link) => {
const linkAttrs = $(link).attr()
const rel = linkAttrs?.rel
// HTML standard defines only 3 rel values for valid integrity attributes on links : stylesheet, preload and modulepreload
// https://html.spec.whatwg.org/multipage/semantics.html#attr-link-integrity
if (rel === 'stylesheet' || rel === 'preload' || rel === 'modulepreload') {
const href = linkAttrs?.href
const integrity = linkAttrs?.integrity
// Only add integrity to resources that do not already have one
if (href && !integrity) {
// Get the integrity hash from our static database
const hash = sriHashes[href]
// Set the integrity hash in HTML if found
if (hash) {
$(link).attr('integrity', hash)
}
}

element = element.replace(LINK_RE, (match, rest: string, href: string) => {
const hash = sriHashes[href]
if (hash) {
const integrityLink = `<link integrity="${hash}"${rest}>`
return integrityLink
} else {
return match
}
})

return element
})
}
})
})
})
62 changes: 32 additions & 30 deletions src/runtime/nitro/plugins/30-cspSsgHashes.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
import { defineNitroPlugin } from '#imports'
import { resolveSecurityRules } from '../context'
import { generateHash } from '../../../utils/hash'
import type { Section } from '../../../types/module'

/*
FOLLOWING PATTERN NOT IN USE:
Placeholder until a proper caching strategy is though of:
/<script((?=[^>]+src="([\w:.-\/]+)")(?:(?![^>]+integrity="[\w-]+")|(?=[^>]+integrity="([\w-])"))[^>]+)(?:\/>|><\/script>)/g
Allows to obtain integrity from both scripts with integrity and those without (useful for 03)
*/

const INLINE_SCRIPT_RE = /<script(?![^>]*?\bsrc="[\w:.\-\\/]+")[^>]*>(.*?)<\/script>/g
const STYLE_RE = /<style[^>]*>(.*?)<\/style>/g
const SCRIPT_RE = /<script(?=[^>]+\bsrc="[^"]+")(?=[^>]+\bintegrity="([\w\-+/=]+)")[^>]+(?:\/>|><\/script>)/g
const LINK_RE = /<link(?=[^>]+\brel="(stylesheet|preload|modulepreload)")(?=[^>]+\bintegrity="([\w\-+/=]+)")(?=(?:[^>]+\bas="(\w+)")?)[^>]+>/g



/**
* This plugin adds security hashes to the event context for later use in the CSP header.
Expand All @@ -26,8 +41,6 @@ export default defineNitroPlugin((nitroApp) => {
const scriptHashes = event.context.security!.hashes.script
const styleHashes = event.context.security!.hashes.style
const hashAlgorithm = 'sha256'
type Section = 'body' | 'bodyAppend' | 'bodyPrepend' | 'head'
const cheerios = event.context.security!.cheerios!

// Parse HTML if SSG is enabled for this route
if (rules.ssg) {
Expand All @@ -36,42 +49,32 @@ export default defineNitroPlugin((nitroApp) => {
// Scan all relevant sections of the NuxtRenderHtmlContext
const sections = ['body', 'bodyAppend', 'bodyPrepend', 'head'] as Section[]
for (const section of sections) {
cheerios[section].forEach($ => {
// Parse all script tags
html[section].forEach(element => {
if (hashScripts) {
$('script').each((i, script) => {
const scriptText = $(script).text()
const scriptAttrs = $(script).attr()
const src = scriptAttrs?.src
const integrity = scriptAttrs?.integrity
if (!src && scriptText) {
// Hash inline scripts with content
scriptHashes.add(`'${generateHash(scriptText, hashAlgorithm)}'`)
} else if (src && integrity) {
// Whitelist external scripts with integrity
scriptHashes.add(`'${integrity}'`)
}
})
// Parse all script tags
const inlineScriptMatches = element.matchAll(INLINE_SCRIPT_RE)
for (const [, scriptText] of inlineScriptMatches) {
scriptHashes.add(`'${generateHash(scriptText, hashAlgorithm)}'`)
}
const externalScriptMatches = element.matchAll(SCRIPT_RE)
for (const [, integrity] of externalScriptMatches) {
scriptHashes.add(`'${integrity}'`)
}
}

// Parse all style tags
if (hashStyles) {
$('style').each((i, style) => {
const styleText = $(style).text()
if (styleText) {
// Hash inline styles with content
styleHashes.add(`'${generateHash(styleText, hashAlgorithm)}'`)
}
})
const styleMatches = element.matchAll(STYLE_RE)
for (const [, styleText] of styleMatches) {
styleHashes.add(`'${generateHash(styleText, hashAlgorithm)}'`)
}
}

// Parse all link tags
$('link').each((i, link) => {
const linkAttrs = $(link).attr()
const integrity = linkAttrs?.integrity
const linkMatches = element.matchAll(LINK_RE)
for (const [, rel, integrity, as] of linkMatches) {
// Whitelist links to external resources with integrity
if (integrity) {
const rel = linkAttrs?.rel
// HTML standard defines only 3 rel values for valid integrity attributes on links : stylesheet, preload and modulepreload
// https://html.spec.whatwg.org/multipage/semantics.html#attr-link-integrity
if (rel === 'stylesheet' && hashStyles) {
Expand All @@ -81,7 +84,6 @@ export default defineNitroPlugin((nitroApp) => {
// Fetch standard defines the destination (https://fetch.spec.whatwg.org/#destination-table)
// This table is the official mapping between HTML and CSP
// We only support script-src for now, but we could populate other policies in the future
const as = linkAttrs.as
switch (as) {
case 'script':
case 'audioworklet':
Expand All @@ -97,7 +99,7 @@ export default defineNitroPlugin((nitroApp) => {
scriptHashes.add(`'${integrity}'`)
}
}
})
}
})
}
}
Expand Down
21 changes: 16 additions & 5 deletions src/runtime/nitro/plugins/40-cspSsrNonce.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ import { defineNitroPlugin } from '#imports'
import crypto from 'node:crypto'
import { resolveSecurityRules } from '../context'

const LINK_RE = /<link([^>]*?>)/g
const SCRIPT_RE = /<script([^>]*?>)/g

Check failure

Code scanning / CodeQL

Bad HTML filtering regexp High

This regular expression does not match upper case <SCRIPT> tags.
const STYLE_RE = /<style([^>]*?>)/g


/**
* This plugin generates a nonce for the current request and adds it to the HTML.
* It only runs in SSR mode.
Expand Down Expand Up @@ -31,15 +36,21 @@ export default defineNitroPlugin((nitroApp) => {
// Scan all relevant sections of the NuxtRenderHtmlContext
type Section = 'body' | 'bodyAppend' | 'bodyPrepend' | 'head'
const sections = ['body', 'bodyAppend', 'bodyPrepend', 'head'] as Section[]
const cheerios = event.context.security!.cheerios!
for (const section of sections) {
cheerios[section].forEach($ => {
html[section] = html[section].map(element => {
// Add nonce to all link tags
$('link').attr('nonce', nonce)
element = element.replace(LINK_RE, (match, rest)=>{
return `<link nonce="${nonce}"` + rest
})
// Add nonce to all script tags
$('script').attr('nonce', nonce)
element = element.replace(SCRIPT_RE, (match, rest)=>{
return `<script nonce="${nonce}"` + rest
})
// Add nonce to all style tags
$('style').attr('nonce', nonce)
element = element.replace(STYLE_RE, (match, rest)=>{
return `<style nonce="${nonce}"` + rest
})
return element
})
}
})
Expand Down
23 changes: 6 additions & 17 deletions src/runtime/nitro/plugins/60-recombineHtml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,21 @@ import { resolveSecurityRules } from '../context'
import { headerStringFromObject } from '../../../utils/headers'

/**
* This plugin recombines the HTML sections from the Cheerio instances in the event context.
* It also adds the Content-Security-Policy header to the HTML meta tag in SSG mode.
* This plugin adds the Content-Security-Policy header to the HTML meta tag in SSG mode.
*/
export default defineNitroPlugin((nitroApp) => {
if (!import.meta.prerender) {
return
}

nitroApp.hooks.hook('render:html', (html, { event }) => {
// Exit if no need to parse HTML for this route
const rules = resolveSecurityRules(event)
if (!rules.enabled) {
return
}

if (rules.sri || (rules.headers && rules.headers.contentSecurityPolicy)) {
// Scan all relevant sections of the NuxtRenderHtmlContext
type Section = 'body' | 'bodyAppend' | 'bodyPrepend' | 'head'
const sections = ['body', 'bodyAppend', 'bodyPrepend', 'head'] as Section[]
const cheerios = event.context.security!.cheerios!

for (const section of sections) {
html[section] = cheerios[section].map($ => {
const html = $.html()
return html
})
}
}

if (rules.ssg && rules.ssg.meta && rules.headers && rules.headers.contentSecurityPolicy && import.meta.prerender) {
if (rules.ssg && rules.ssg.meta && rules.headers && rules.headers.contentSecurityPolicy) {
const csp = structuredClone(rules.headers.contentSecurityPolicy)
csp['frame-ancestors'] = false
const headerValue = headerStringFromObject('contentSecurityPolicy', csp)
Expand Down
2 changes: 0 additions & 2 deletions src/types/module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import type { Options as RemoveOptions } from 'unplugin-remove/types'
import type { SecurityHeaders } from './headers'
import type { AllowedHTTPMethods, BasicAuth, RateLimiter, RequestSizeLimiter, XssValidator, CorsOptions } from './middlewares'
import type { HookResult } from '@nuxt/schema'
import type { CheerioAPI } from 'cheerio'

export type Ssg = {
meta?: boolean;
Expand Down Expand Up @@ -95,7 +94,6 @@ declare module 'h3' {
script: Set<string>;
style: Set<string>;
};
cheerios?: Record<Section, CheerioAPI[]>;
}
}
}
Loading

0 comments on commit 60ddf61

Please sign in to comment.