From a11eb10b34d41ae596cd9218d1ed0c538b69dc58 Mon Sep 17 00:00:00 2001 From: Tibor Blenessy Date: Fri, 10 Sep 2021 10:21:47 +0200 Subject: [PATCH] Fix handling of forward slash when parsing regexp (#2798) --- eslint-bridge/src/rules/regex-complexity.ts | 20 +++++--- eslint-bridge/src/utils/utils-regex.ts | 8 ++- .../src/utils/utils-string-literal.ts | 8 +++ .../tests/rules/regex-complexity.test.ts | 50 +++++++++++++++++++ eslint-bridge/tests/utils/utils-regex.test.ts | 42 ++++++++++++++++ 5 files changed, 118 insertions(+), 10 deletions(-) create mode 100644 eslint-bridge/tests/utils/utils-regex.test.ts diff --git a/eslint-bridge/src/rules/regex-complexity.ts b/eslint-bridge/src/rules/regex-complexity.ts index e31cf66d3c2..b0e33980c77 100644 --- a/eslint-bridge/src/rules/regex-complexity.ts +++ b/eslint-bridge/src/rules/regex-complexity.ts @@ -35,6 +35,7 @@ import { import { getParsedRegex, getRegexpLocation, + getRegexpRange, getUniqueWriteUsage, isBinaryPlus, isIdentifier, @@ -188,9 +189,10 @@ class ComplexityCalculator { onAssertionEnter: (node: Assertion) => { /* lookaround */ if (node.kind === 'lookahead' || node.kind === 'lookbehind') { + const [start, end] = getRegexpRange(this.regexPart, node); this.increaseComplexity(this.nesting, node, [ 0, - -(node.end - node.start - 1) + (node.kind === 'lookahead' ? '?='.length : '?<='.length), + -(end - start - 1) + (node.kind === 'lookahead' ? '?='.length : '?<='.length), ]); this.nesting++; this.onDisjunctionEnter(node); @@ -216,7 +218,8 @@ class ComplexityCalculator { }, onCharacterClassEnter: (node: CharacterClass) => { /* character class */ - this.increaseComplexity(1, node, [0, -(node.end - node.start - 1)]); + const [start, end] = getRegexpRange(this.regexPart, node); + this.increaseComplexity(1, node, [0, -(end - start - 1)]); this.nesting++; }, onCharacterClassLeave: (_node: CharacterClass) => { @@ -241,7 +244,9 @@ class ComplexityCalculator { }, onQuantifierEnter: (node: Quantifier) => { /* repetition */ - this.increaseComplexity(this.nesting, node, [node.element.end - node.start, 0]); + const [start] = getRegexpRange(this.regexPart, node); + const [, end] = getRegexpRange(this.regexPart, node.element); + this.increaseComplexity(this.nesting, node, [end - start, 0]); this.nesting++; }, onQuantifierLeave: (_node: Quantifier) => { @@ -257,9 +262,10 @@ class ComplexityCalculator { if (increment > 1) { message += ` (incl ${increment - 1} for nesting)`; } + const loc = getRegexpLocation(this.regexPart, node, this.context, offset); this.components.push({ location: { - loc: getRegexpLocation(this.regexPart, node, this.context, offset), + loc, }, message, }); @@ -270,10 +276,8 @@ class ComplexityCalculator { let { alternatives } = node; let increment = this.nesting; while (alternatives.length > 1) { - this.increaseComplexity(increment, alternatives[1], [ - -1, - -(alternatives[1].end - alternatives[1].start), - ]); + const [start, end] = getRegexpRange(this.regexPart, alternatives[1]); + this.increaseComplexity(increment, alternatives[1], [-1, -(end - start)]); increment = 1; alternatives = alternatives.slice(1); } diff --git a/eslint-bridge/src/utils/utils-regex.ts b/eslint-bridge/src/utils/utils-regex.ts index a6d754dd732..d279cfba794 100644 --- a/eslint-bridge/src/utils/utils-regex.ts +++ b/eslint-bridge/src/utils/utils-regex.ts @@ -142,7 +142,7 @@ export function getRegexpLocation( return loc; } -function getRegexpRange(node: estree.Node, regexpNode: regexpp.AST.Node): AST.Range { +export function getRegexpRange(node: estree.Node, regexpNode: regexpp.AST.Node): AST.Range { if (isRegexLiteral(node)) { return [regexpNode.start, regexpNode.end]; } @@ -169,7 +169,7 @@ function getRegexpRange(node: estree.Node, regexpNode: regexpp.AST.Node): AST.Ra } // regexpNode positions are 1 - based, we need to -1 to report as 0 - based // it's possible for node start to be outside of range, e.g. `a` in new RegExp('//a') - const startToken = Math.min(regexpNode.start - 1, tokens.length - 1); + const startToken = regexpNode.start - 1; const start = tokens[startToken].range[0]; // it's possible for node end to be outside of range, e.g. new RegExp('\n(|)') const endToken = Math.min(regexpNode.end - 2, tokens.length - 1); @@ -177,6 +177,10 @@ function getRegexpRange(node: estree.Node, regexpNode: regexpp.AST.Node): AST.Ra // +1 is needed to account for string quotes return [start + 1, end + 1]; } + if (node.type === 'TemplateLiteral') { + // we don't support these properly + return node.range!; + } throw new Error(`Expected regexp or string literal, got ${node.type}`); } diff --git a/eslint-bridge/src/utils/utils-string-literal.ts b/eslint-bridge/src/utils/utils-string-literal.ts index 8aff0519a81..4258fdefa5c 100644 --- a/eslint-bridge/src/utils/utils-string-literal.ts +++ b/eslint-bridge/src/utils/utils-string-literal.ts @@ -27,6 +27,7 @@ const UNICODE_ESCAPE_LENGTH = 4; const HEX_ESCAPE_LENGTH = 2; const CP_BACK_SLASH = cp('\\'); +const CP_FORWARD_SLASH = cp('/'); const CP_CR = cp('\r'); const CP_LF = cp('\n'); const CP_n = cp('n'); @@ -143,6 +144,13 @@ export function tokenizeString(s: string): StringLiteralToken[] { if (value !== '') { tokens.push({ value, range: [start, pos] }); } + } else if (c === CP_FORWARD_SLASH) { + const forwardSlash: StringLiteralToken = { + value: String.fromCodePoint(c), + range: [start, pos], + }; + tokens.push(forwardSlash); + tokens.push(forwardSlash); } else { tokens.push({ value: String.fromCodePoint(c), range: [start, pos] }); } diff --git a/eslint-bridge/tests/rules/regex-complexity.test.ts b/eslint-bridge/tests/rules/regex-complexity.test.ts index ef60d4ab695..bef94d0a379 100644 --- a/eslint-bridge/tests/rules/regex-complexity.test.ts +++ b/eslint-bridge/tests/rules/regex-complexity.test.ts @@ -322,6 +322,56 @@ ruleTesterThreshold0.run( errors: 1, options: [0], }, + { + code: ` + RegExp('/s*') + `, + options: [0], + errors: [ + { + message: JSON.stringify({ + message: + 'Simplify this regular expression to reduce its complexity from 1 to the 0 allowed.', + cost: 1, + secondaryLocations: [ + { + message: '+1', + column: 18, + line: 2, + endColumn: 19, + endLine: 2, + }, + ], + }), + }, + ], + }, + { + code: ` + RegExp('|/?[a-z]') + `, + options: [0], + errors: [ + { + message: JSON.stringify({ + message: + 'Simplify this regular expression to reduce its complexity from 4 to the 0 allowed.', + cost: 4, + secondaryLocations: [ + { message: '+1', column: 16, line: 2, endColumn: 17, endLine: 2 }, + { + message: '+2 (incl 1 for nesting)', + column: 18, + line: 2, + endColumn: 19, + endLine: 2, + }, + { message: '+1', column: 19, line: 2, endColumn: 20, endLine: 2 }, + ], + }), + }, + ], + }, ], }, ); diff --git a/eslint-bridge/tests/utils/utils-regex.test.ts b/eslint-bridge/tests/utils/utils-regex.test.ts new file mode 100644 index 00000000000..dcb438e8815 --- /dev/null +++ b/eslint-bridge/tests/utils/utils-regex.test.ts @@ -0,0 +1,42 @@ +/* + * SonarQube JavaScript Plugin + * Copyright (C) 2011-2021 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +import * as esprima from 'esprima'; +import * as estree from 'estree'; +import { getRegexpRange } from 'utils'; +import * as regexpp from 'regexpp'; + +it('should get range for regexp /s*', () => { + const program = esprima.parse(`'/s*'`); + const literal: estree.Literal = program.body[0].expression; + const regexNode = regexpp.parseRegExpLiteral(new RegExp(literal.value as string)); + const quantifier = regexNode.pattern.alternatives[0].elements[1]; // s* + const range = getRegexpRange(literal, quantifier); + expect(range).toStrictEqual([2, 4]); +}); + +it('should get range for regexp |/?[a-z]', () => { + const program = esprima.parse(`'|/?[a-z]'`); + const literal: estree.Literal = program.body[0].expression; + const regexNode = regexpp.parseRegExpLiteral(new RegExp(literal.value as string)); + const alternative = regexNode.pattern.alternatives[1]; // /?[a-z] + const range = getRegexpRange(literal, alternative); + expect(range).toStrictEqual([2, 9]); +});