Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide Syntax Checking for Regular Expressions #55600

Merged
merged 14 commits into from
Apr 19, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Revise According to Suggestions
graphemecluster committed Apr 19, 2024
commit 7d6c4651e7af16c74311629edf3e3e3658dfe941
2 changes: 2 additions & 0 deletions src/compiler/commandLineParser.ts
Original file line number Diff line number Diff line change
@@ -534,6 +534,8 @@ export const targetOptionDeclaration: CommandLineOptionOfCustomType = {
es2020: ScriptTarget.ES2020,
es2021: ScriptTarget.ES2021,
es2022: ScriptTarget.ES2022,
es2023: ScriptTarget.ES2023,
es2024: ScriptTarget.ES2024,
esnext: ScriptTarget.ESNext,
})),
affectsSourceFile: true,
2 changes: 1 addition & 1 deletion src/compiler/diagnosticMessages.json
Original file line number Diff line number Diff line change
@@ -1789,7 +1789,7 @@
"category": "Error",
"code": 1534
},
"'\\{0}' is not a valid character escape.": {
"This character cannot be escaped in a regular expression.": {
"category": "Error",
"code": 1535
},
4 changes: 2 additions & 2 deletions src/compiler/program.ts
Original file line number Diff line number Diff line change
@@ -126,6 +126,7 @@ import {
getLineStarts,
getMatchedFileSpec,
getMatchedIncludeSpec,
getNameOfScriptTarget,
getNewLineCharacter,
getNormalizedAbsolutePath,
getNormalizedAbsolutePathWithoutRoot,
@@ -306,7 +307,6 @@ import {
SyntaxKind,
sys,
System,
targetOptionDeclaration,
toFileNameLowerCase,
tokenToString,
toPath as ts_toPath,
@@ -4712,7 +4712,7 @@ export function createProgram(rootNamesOrOptions: readonly string[] | CreateProg
message = Diagnostics.File_is_library_specified_here;
break;
}
const target = forEachEntry(targetOptionDeclaration.type, (value, key) => value === getEmitScriptTarget(options) ? key : undefined);
const target = getNameOfScriptTarget(getEmitScriptTarget(options));
configFileNode = target ? getOptionsSyntaxByValue("target", target) : undefined;
message = Diagnostics.File_is_default_library_for_target_specified_here;
break;
50 changes: 29 additions & 21 deletions src/compiler/scanner.ts
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@ import {
DiagnosticMessage,
Diagnostics,
forEach,
getNameOfScriptTarget,
getSpellingSuggestion,
identity,
JSDocParsingMode,
@@ -428,10 +429,6 @@ export function characterToRegularExpressionFlag(c: string): RegularExpressionFl
return charToRegExpFlag.get(c);
}

function regularExpressionFlagToFirstAvailableLanguageVersion(f: RegularExpressionFlags): ScriptTarget | undefined {
return regExpFlagToFirstAvailableLanguageVersion.get(f);
}

/** @internal */
export function computeLineStarts(text: string): number[] {
const result: number[] = [];
@@ -1627,13 +1624,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
return "";
default:
if (isRegularExpression && (shouldEmitInvalidEscapeError || isIdentifierPart(ch, languageVersion))) {
error(Diagnostics._0_is_not_a_valid_character_escape, pos - 2, 2, String.fromCharCode(ch));
error(Diagnostics.This_character_cannot_be_escaped_in_a_regular_expression, pos - 2, 2);
}
return String.fromCharCode(ch);
}
}

function scanExtendedUnicodeEscape(shouldEmitInvalidEscapeError = true): string {
function scanExtendedUnicodeEscape(shouldEmitInvalidEscapeError: boolean): string {
const start = pos;
pos += 3;
const escapedStart = pos;
@@ -1717,7 +1714,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
else if (ch === CharacterCodes.backslash) {
ch = peekExtendedUnicodeEscape();
if (ch >= 0 && isIdentifierPart(ch, languageVersion)) {
result += scanExtendedUnicodeEscape();
result += scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true);
start = pos;
continue;
}
@@ -2243,7 +2240,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
case CharacterCodes.backslash:
const extendedCookedChar = peekExtendedUnicodeEscape();
if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) {
tokenValue = scanExtendedUnicodeEscape() + scanIdentifierParts();
tokenValue = scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts();
return token = getIdentifierToken();
}

@@ -2270,7 +2267,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
pos++;
const extendedCookedChar = peekExtendedUnicodeEscape();
if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) {
tokenValue = "#" + scanExtendedUnicodeEscape() + scanIdentifierParts();
tokenValue = "#" + scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts();
return token = SyntaxKind.PrivateIdentifier;
}

@@ -2402,9 +2399,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
// Quickly get to the end of regex such that we know the flags
let p = tokenStart + 1;
let inEscape = false;
// Although nested character class is allowed in Unicode Sets mode,
// an unescaped slash is nevertheless invalid even in character class in Unicode mode.
// Thus we can simply ignore nested character class in the first pass.
// Although nested character classes are allowed in Unicode Sets mode,
// an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
// Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
// as unterminated, consuming characters beyond the slash. (This even applies to `/[[]/v`,
// which should be parsed as a well-terminated regex with an incomplete character class.)
// Thus we must not handle nested character classes in the first pass.
let inCharacterClass = false;
while (true) {
// If we reach the end of a file, or hit a newline, then this is an unterminated
@@ -2464,17 +2464,17 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
}
else {
regExpFlags |= flag;
const availableFrom = regularExpressionFlagToFirstAvailableLanguageVersion(flag)!;
const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
if (languageVersion < availableFrom) {
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, p, 1, ScriptTarget[availableFrom]);
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, p, 1, getNameOfScriptTarget(availableFrom));
}
}
p++;
}
pos = tokenStart + 1;
const saveTokenPos = tokenStart;
const saveTokenFlags = tokenFlags;
scanRegularExpressionWorker(text.slice(0, endOfBody), endOfBody, regExpFlags, isUnterminated);
scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated);
if (!isUnterminated) {
pos = p;
}
@@ -2486,14 +2486,22 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
return token;

function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean) {
/** Grammar parameter */
const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);
/** Grammar parameter */
const unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets);
/** @see {scanClassSetExpression} */
let mayContainStrings = false;

/** The number of numeric (anonymous) capturing groups defined in the regex. */
let numberOfCapturingGroups = 0;
/** All named capturing groups defined in the regex. */
const groupSpecifiers = new Set<string>();
/** All references to named capturing groups in the regex. */
const groupNameReferences: (TextRange & { name: string; })[] = [];
/** All numeric backreferences within the regex. */
const decimalEscapes: (TextRange & { value: number; })[] = [];
/** A stack of scopes for named capturing groups. @see {scanGroupName} */
const namedCapturingGroups: Set<string>[] = [];

// Disjunction ::= Alternative ('|' Alternative)*
@@ -2595,7 +2603,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
break;
default:
const start = pos;
const setFlags = scanPatternModifiers();
const setFlags = scanPatternModifiers(RegularExpressionFlags.None);
if (text.charCodeAt(pos) === CharacterCodes.minus) {
pos++;
scanPatternModifiers(setFlags);
@@ -2689,7 +2697,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
// Assume what starting from the character to be outside of the regex
return;
}
if (unicodeMode) {
if (unicodeMode || ch === CharacterCodes.closeParen) {
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
}
pos++;
@@ -2706,7 +2714,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
}
}

function scanPatternModifiers(currFlags = RegularExpressionFlags.None): RegularExpressionFlags {
function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags {
while (pos < end) {
const ch = text.charCodeAt(pos);
if (!isIdentifierPart(ch, languageVersion)) {
@@ -2724,9 +2732,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
}
else {
currFlags |= flag;
const availableFrom = regularExpressionFlagToFirstAvailableLanguageVersion(flag)!;
const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
if (languageVersion < availableFrom) {
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, ScriptTarget[availableFrom]);
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom));
}
}
pos++;
@@ -3702,7 +3710,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
pos--;
const extendedCookedChar = peekExtendedUnicodeEscape();
if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) {
tokenValue = scanExtendedUnicodeEscape() + scanIdentifierParts();
tokenValue = scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts();
return token = getIdentifierToken();
}

2 changes: 1 addition & 1 deletion src/compiler/types.ts
Original file line number Diff line number Diff line change
@@ -7366,7 +7366,7 @@ export const enum ScriptKind {
// NOTE: We must reevaluate the target for upcoming features when each successive TC39 edition is ratified in
// June of each year. This includes changes to `LanguageFeatureMinimumTarget`, `ScriptTarget`,
// transformers/esnext.ts, commandLineParser.ts, and the contents of each lib/esnext.*.d.ts file.
export enum ScriptTarget {
export const enum ScriptTarget {
/** @deprecated */
ES3 = 0,
ES5 = 1,
6 changes: 6 additions & 0 deletions src/compiler/utilities.ts
Original file line number Diff line number Diff line change
@@ -514,6 +514,7 @@ import {
SymbolTable,
SyntaxKind,
TaggedTemplateExpression,
targetOptionDeclaration,
TemplateExpression,
TemplateLiteral,
TemplateLiteralLikeNode,
@@ -8921,6 +8922,11 @@ export function getStrictOptionValue(compilerOptions: CompilerOptions, flag: Str
return compilerOptions[flag] === undefined ? !!compilerOptions.strict : !!compilerOptions[flag];
}

/** @internal */
export function getNameOfScriptTarget(scriptTarget: ScriptTarget): string | undefined {
return forEachEntry(targetOptionDeclaration.type, (value, key) => value === scriptTarget ? key : undefined);
}

/** @internal */
export function getEmitStandardClassFields(compilerOptions: CompilerOptions) {
return compilerOptions.useDefineForClassFields !== false && getEmitScriptTarget(compilerOptions) >= ScriptTarget.ES2022;
5 changes: 2 additions & 3 deletions src/compiler/watch.ts
Original file line number Diff line number Diff line change
@@ -46,7 +46,6 @@ import {
find,
flattenDiagnosticMessageText,
forEach,
forEachEntry,
ForegroundColorEscapeSequences,
formatColorAndReset,
formatDiagnostic,
@@ -57,6 +56,7 @@ import {
getDirectoryPath,
getEmitScriptTarget,
getLineAndCharacterOfPosition,
getNameOfScriptTarget,
getNewLineCharacter,
getNormalizedAbsolutePath,
getParsedCommandLineOfConfigFile,
@@ -93,7 +93,6 @@ import {
sourceMapCommentRegExpDontCareLineStart,
sys,
System,
targetOptionDeclaration,
WatchCompilerHost,
WatchCompilerHostOfConfigFile,
WatchCompilerHostOfFilesAndCompilerOptions,
@@ -537,7 +536,7 @@ export function fileIncludeReasonToDiagnostics(program: Program, reason: FileInc
}
case FileIncludeKind.LibFile: {
if (reason.index !== undefined) return chainDiagnosticMessages(/*details*/ undefined, Diagnostics.Library_0_specified_in_compilerOptions, options.lib![reason.index]);
const target = forEachEntry(targetOptionDeclaration.type, (value, key) => value === getEmitScriptTarget(options) ? key : undefined);
const target = getNameOfScriptTarget(getEmitScriptTarget(options));
const messageAndArgs: DiagnosticAndArguments = target ? [Diagnostics.Default_library_for_target_0, target] : [Diagnostics.Default_library];
return chainDiagnosticMessages(/*details*/ undefined, ...messageAndArgs);
}
20 changes: 20 additions & 0 deletions tests/baselines/reference/callChainWithSuper(target=es2023).js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//// [tests/cases/conformance/expressions/optionalChaining/callChain/callChainWithSuper.ts] ////

//// [callChainWithSuper.ts]
// GH#34952
class Base { method?() {} }
class Derived extends Base {
method1() { return super.method?.(); }
method2() { return super["method"]?.(); }
}

//// [callChainWithSuper.js]
"use strict";
// GH#34952
class Base {
method() { }
}
class Derived extends Base {
method1() { return super.method?.(); }
method2() { return super["method"]?.(); }
}
20 changes: 20 additions & 0 deletions tests/baselines/reference/callChainWithSuper(target=es2024).js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//// [tests/cases/conformance/expressions/optionalChaining/callChain/callChainWithSuper.ts] ////

//// [callChainWithSuper.ts]
// GH#34952
class Base { method?() {} }
class Derived extends Base {
method1() { return super.method?.(); }
method2() { return super["method"]?.(); }
}

//// [callChainWithSuper.js]
"use strict";
// GH#34952
class Base {
method() { }
}
class Derived extends Base {
method1() { return super.method?.(); }
method2() { return super["method"]?.(); }
}
Original file line number Diff line number Diff line change
@@ -7,4 +7,4 @@ FileNames::
0.ts
Errors::
error TS6044: Compiler option 'target' expects an argument.
error TS6046: Argument for '--target' option must be: 'es5', 'es6', 'es2015', 'es2016', 'es2017', 'es2018', 'es2019', 'es2020', 'es2021', 'es2022', 'esnext'.
error TS6046: Argument for '--target' option must be: 'es5', 'es6', 'es2015', 'es2016', 'es2017', 'es2018', 'es2019', 'es2020', 'es2021', 'es2022', 'es2023', 'es2024', 'esnext'.
Original file line number Diff line number Diff line change
@@ -23,5 +23,5 @@ CompilerOptions::
"configFilePath": "tsconfig.json"
}
Errors::
error TS6046: Argument for '--target' option must be: 'es5', 'es6', 'es2015', 'es2016', 'es2017', 'es2018', 'es2019', 'es2020', 'es2021', 'es2022', 'esnext'.
error TS6046: Argument for '--target' option must be: 'es5', 'es6', 'es2015', 'es2016', 'es2017', 'es2018', 'es2019', 'es2020', 'es2021', 'es2022', 'es2023', 'es2024', 'esnext'.

Original file line number Diff line number Diff line change
@@ -23,7 +23,7 @@ CompilerOptions::
"configFilePath": "tsconfig.json"
}
Errors::
tsconfig.json:3:15 - error TS6046: Argument for '--target' option must be: 'es5', 'es6', 'es2015', 'es2016', 'es2017', 'es2018', 'es2019', 'es2020', 'es2021', 'es2022', 'esnext'.
tsconfig.json:3:15 - error TS6046: Argument for '--target' option must be: 'es5', 'es6', 'es2015', 'es2016', 'es2017', 'es2018', 'es2019', 'es2020', 'es2021', 'es2022', 'es2023', 'es2024', 'esnext'.

3 "target": "",
   ~~
Original file line number Diff line number Diff line change
@@ -47,7 +47,7 @@ Errors::
  ~~~
 19 }
  ~
tsconfig.json:3:15 - error TS6046: Argument for '--target' option must be: 'es5', 'es6', 'es2015', 'es2016', 'es2017', 'es2018', 'es2019', 'es2020', 'es2021', 'es2022', 'esnext'.
tsconfig.json:3:15 - error TS6046: Argument for '--target' option must be: 'es5', 'es6', 'es2015', 'es2016', 'es2017', 'es2018', 'es2019', 'es2020', 'es2021', 'es2022', 'es2023', 'es2024', 'esnext'.

3 "target": "<%- options.useTsWithBabel ? 'esnext' : 'es5' %>",
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
regExpWithOpenBracketInCharClass.ts(4,7): error TS1005: ']' expected.
regExpWithOpenBracketInCharClass.ts(4,8): error TS1501: This regular expression flag is only available when targeting 'es2024' or later.


==== regExpWithOpenBracketInCharClass.ts (2 errors) ====
const regexes: RegExp[] = [
/[[]/, // Valid
/[[]/u, // Valid
/[[]/v, // Well-terminated regex with an incomplete character class

!!! error TS1005: ']' expected.
~
!!! error TS1501: This regular expression flag is only available when targeting 'es2024' or later.
];

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//// [tests/cases/compiler/regExpWithOpenBracketInCharClass.ts] ////

//// [regExpWithOpenBracketInCharClass.ts]
const regexes: RegExp[] = [
/[[]/, // Valid
/[[]/u, // Valid
/[[]/v, // Well-terminated regex with an incomplete character class
];


//// [regExpWithOpenBracketInCharClass.js]
const regexes = [
/[[]/, // Valid
/[[]/u, // Valid
/[[]/v, // Well-terminated regex with an incomplete character class
];
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
//// [tests/cases/compiler/regExpWithOpenBracketInCharClass.ts] ////

=== regExpWithOpenBracketInCharClass.ts ===
const regexes: RegExp[] = [
>regexes : Symbol(regexes, Decl(regExpWithOpenBracketInCharClass.ts, 0, 5))
>RegExp : Symbol(RegExp, Decl(lib.es5.d.ts, --, --), Decl(lib.es5.d.ts, --, --), Decl(lib.es2015.core.d.ts, --, --), Decl(lib.es2015.symbol.wellknown.d.ts, --, --))

/[[]/, // Valid
/[[]/u, // Valid
/[[]/v, // Well-terminated regex with an incomplete character class
];

Loading