Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crates/oxc_linter/src/rules/eslint/no_control_regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,9 @@ mod tests {
r"/\u{1F}/",
r"/\u{1F}/g",
r"new RegExp('\\u{20}', 'u')",
r"new RegExp('\\u{20}', `u`)",
r"new RegExp('\\u{1F}')",
r"new RegExp(`\\u{1F}`)",
r"new RegExp('\\u{1F}', 'g')",
r"new RegExp('\\u{1F}', flags)", // unknown flags, we assume no 'u'
// https://github.com/oxc-project/oxc/issues/6136
Expand Down Expand Up @@ -347,6 +349,8 @@ mod tests {
r"/\u{1F}/u",
r"/\u{1F}/ugi",
r"new RegExp('\\u{1F}', 'u')",
r"new RegExp(`\\u{1F}`, 'u')",
r"new RegExp('\\u{1F}', `u`)",
r"new RegExp('\\u{1F}', 'ugi')",
// https://github.com/oxc-project/oxc/issues/6136
r"/\u{0a}/u",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ fn test() {
r"new RegExp(`${prefix}\\1(a)`)",
r"let RegExp; new RegExp('\\1(a)');",
r"function foo() { var RegExp; RegExp('\\1(a)', 'u'); }",
r"function foo() { var RegExp; RegExp('\\1(a)', `u`); }",
r"function foo(RegExp) { new RegExp('\\1(a)'); }",
r"if (foo) { const RegExp = bar; RegExp('\\1(a)'); }",
// we don't support globals off yet
Expand Down
Binary file modified crates/oxc_linter/src/snapshots/eslint_no_control_regex.snap
Binary file not shown.
38 changes: 37 additions & 1 deletion crates/oxc_linter/src/utils/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,55 @@ where
let arg2 = arg2.and_then(Argument::as_expression).map(Expression::get_inner_expression);
// note: improvements required for strings used via identifier references
// Missing or non-string arguments will be runtime errors, but are not covered by this rule.
match (&arg1, &arg2) {
match (arg1, arg2) {
(Some(Expression::StringLiteral(pattern)), Some(Expression::StringLiteral(flags))) => {
let allocator = Allocator::default();
if let Some(pat) = parse_regex(&allocator, pattern.span, Some(flags.span), ctx) {
cb(&pat, pattern.span);
}
}
(Some(Expression::StringLiteral(pattern)), Some(Expression::TemplateLiteral(flags))) => {
if !flags.is_no_substitution_template() {
return;
}
let allocator = Allocator::default();
if let Some(pat) = parse_regex(&allocator, pattern.span, Some(flags.span), ctx) {
cb(&pat, pattern.span);
}
}
(Some(Expression::StringLiteral(pattern)), _) => {
let allocator = Allocator::default();
if let Some(pat) = parse_regex(&allocator, pattern.span, None, ctx) {
cb(&pat, pattern.span);
}
}
(Some(Expression::TemplateLiteral(pattern)), Some(Expression::TemplateLiteral(flags))) => {
if !pattern.is_no_substitution_template() || !flags.is_no_substitution_template() {
return;
}
let allocator = Allocator::default();
if let Some(pat) = parse_regex(&allocator, pattern.span, Some(flags.span), ctx) {
cb(&pat, pattern.span);
}
}
(Some(Expression::TemplateLiteral(pattern)), Some(Expression::StringLiteral(flags))) => {
if !pattern.is_no_substitution_template() {
return;
}
let allocator = Allocator::default();
if let Some(pat) = parse_regex(&allocator, pattern.span, Some(flags.span), ctx) {
cb(&pat, pattern.span);
}
}
(Some(Expression::TemplateLiteral(pattern)), _) => {
if !pattern.is_no_substitution_template() {
return;
}
let allocator = Allocator::default();
if let Some(pat) = parse_regex(&allocator, pattern.span, None, ctx) {
cb(&pat, pattern.span);
}
}
_ => {}
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_regular_expression/src/parser/parser_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ impl<'a> ConstructorParser<'a> {
(false, false)
};

let pattern_text = if matches!(self.pattern_text, r#""""# | "''") {
let pattern_text = if matches!(self.pattern_text, r#""""# | "''" | "``") {
r#""(?:)""#
} else {
self.pattern_text
Expand Down
10 changes: 10 additions & 0 deletions crates/oxc_regular_expression/src/parser/reader/ast.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
use oxc_span::Span;

/// Represents UTF-16 code unit(u16 as u32) or Unicode code point(char as u32).
/// `Span` width may be more than 1, since there will be escape sequences.
#[derive(Debug, Clone, Copy)]
pub struct CodePoint {
pub span: Span,
// NOTE: If we need codegen, more information should be added.
pub value: u32,
}
6 changes: 6 additions & 0 deletions crates/oxc_regular_expression/src/parser/reader/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
mod ast;
mod characters;
mod options;
mod reader_impl;
mod string_literal_parser;
mod template_literal_parser;

pub use ast::*;
pub use options::Options;
pub use reader_impl::Reader;

#[cfg(test)]
Expand Down
54 changes: 36 additions & 18 deletions crates/oxc_regular_expression/src/parser/reader/reader_impl.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
use oxc_diagnostics::Result;
use oxc_span::Atom;

use crate::parser::reader::string_literal_parser::{
Options as StringLiteralParserOptions, Parser as StringLiteralParser, ast as StringLiteralAst,
parse_regexp_literal,
use crate::parser::reader::{
Options,
ast::CodePoint,
string_literal_parser::{
Parser as StringLiteralParser, ast as StringLiteralAst, parse_regexp_literal,
},
template_literal_parser::{Parser as TemplateLiteralParser, ast as TemplateLiteralAst},
};

#[derive(Debug)]
pub struct Reader<'a> {
source_text: &'a str,
units: Vec<StringLiteralAst::CodePoint>,
units: Vec<CodePoint>,
index: usize,
offset: u32,
}
Expand All @@ -17,24 +22,37 @@ impl<'a> Reader<'a> {
pub fn initialize(
source_text: &'a str,
unicode_mode: bool,
parse_string_literal: bool,
parse_string_or_template_literal: bool,
) -> Result<Self> {
// NOTE: This must be `0`.
// Since `source_text` here may be a slice of the original source text,
// using `Span` for `span.source_text(source_text)` will be out of range in some cases.
let span_offset = 0;

let units = if parse_string_literal {
let StringLiteralAst::StringLiteral { body, .. } = StringLiteralParser::new(
source_text,
StringLiteralParserOptions {
strict_mode: false,
span_offset,
combine_surrogate_pair: unicode_mode,
},
)
.parse()?;
body
let units = if parse_string_or_template_literal {
if source_text.chars().next().is_some_and(|c| c == '`') {
let TemplateLiteralAst::TemplateLiteral { body, .. } = TemplateLiteralParser::new(
source_text,
Options {
strict_mode: false,
span_offset,
combine_surrogate_pair: unicode_mode,
},
)
.parse()?;
body
} else {
let StringLiteralAst::StringLiteral { body, .. } = StringLiteralParser::new(
source_text,
Options {
strict_mode: false,
span_offset,
combine_surrogate_pair: unicode_mode,
},
)
.parse()?;
body
}
} else {
parse_regexp_literal(source_text, span_offset, unicode_mode)
};
Expand All @@ -43,9 +61,9 @@ impl<'a> Reader<'a> {
source_text,
units,
index: 0,
// If `parse_string_literal` is `true`, the first character is the opening quote.
// If `parse_string_or_template_literal` is `true`, the first character is the opening quote.
// We need to +1 to skip it.
offset: u32::from(parse_string_literal),
offset: u32::from(parse_string_or_template_literal),
})
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use oxc_span::Span;

use crate::parser::reader::ast::CodePoint;

#[derive(Debug)]
pub struct StringLiteral {
#[allow(unused, clippy::allow_attributes)]
Expand All @@ -14,12 +16,3 @@ pub enum StringLiteralKind {
Double,
Single,
}

/// Represents UTF-16 code unit(u16 as u32) or Unicode code point(char as u32).
/// `Span` width may be more than 1, since there will be escape sequences.
#[derive(Debug, Clone, Copy)]
pub struct CodePoint {
pub span: Span,
// NOTE: If we need codegen, more information should be added.
pub value: u32,
}
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
pub mod ast;
mod characters;
mod diagnostics;
mod options;
mod parser_impl;

pub use options::Options;
pub use parser_impl::{Parser, parse_regexp_literal};

#[cfg(test)]
mod test {
use super::{Options, Parser, ast, parse_regexp_literal};
use crate::parser::reader::Options;

use super::{Parser, ast, parse_regexp_literal};

#[test]
fn should_pass() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use oxc_diagnostics::Result;
use oxc_span::Span;

use crate::parser::reader::string_literal_parser::{
ast,
use crate::parser::reader::{
CodePoint, Options,
characters::{
CR, LF, LS, PS, is_line_terminator, is_non_escape_character, is_single_escape_character,
},
diagnostics,
options::Options,
string_literal_parser::{ast, diagnostics},
};

// Internal representation of escape sequence resolved unit in a string literal.
Expand All @@ -19,7 +18,7 @@ pub fn parse_regexp_literal(
source_text: &str,
span_offset: u32,
combine_surrogate_pair: bool,
) -> Vec<ast::CodePoint> {
) -> Vec<CodePoint> {
let mut body = vec![];

let mut offset = 0;
Expand Down Expand Up @@ -50,7 +49,7 @@ pub struct Parser {
impl Parser {
// This is public because it is used in `parse_regexp_literal()`.
pub fn handle_code_point(
body: &mut Vec<ast::CodePoint>,
body: &mut Vec<CodePoint>,
(offsets, cp): OffsetsAndCp,
span_offset: u32,
combine_surrogate_pair: bool,
Expand All @@ -59,13 +58,13 @@ impl Parser {

if combine_surrogate_pair || (0..=0xffff).contains(&cp) {
// If the code point is in the BMP or if forced, just push it
body.push(ast::CodePoint { span, value: cp });
body.push(CodePoint { span, value: cp });
} else {
// Otherwise, split the code point into a surrogate pair, sharing the same span
let (lead, trail) =
(0xd800 + ((cp - 0x10000) >> 10), 0xdc00 + ((cp - 0x10000) & 0x3ff));
body.push(ast::CodePoint { span, value: lead });
body.push(ast::CodePoint { span, value: trail });
body.push(CodePoint { span, value: lead });
body.push(CodePoint { span, value: trail });
}
}

Expand Down Expand Up @@ -114,10 +113,7 @@ impl Parser {
// SingleStringCharacters ::
// SingleStringCharacter SingleStringCharacters[opt]
// ```
fn parse_string_characters(
&mut self,
single_or_double_quote: char,
) -> Result<Vec<ast::CodePoint>> {
fn parse_string_characters(&mut self, single_or_double_quote: char) -> Result<Vec<CodePoint>> {
let mut body = vec![];
while let Some(code_point) = self.parse_string_character(single_or_double_quote)? {
Parser::handle_code_point(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# template_literal_parser

Implements ECMAScript® 2025 Language Specification

- https://tc39.es/ecma262/2025/multipage/ecmascript-language-lexical-grammar.html#sec-template-literal-lexical-components

It only support `NoSubstitutionTemplate` and returns a diagnostic when it founds `${}` inside it.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
use oxc_span::Span;

use crate::parser::reader::CodePoint;

#[derive(Debug)]
pub struct TemplateLiteral {
#[allow(unused, clippy::allow_attributes)]
pub span: Span,
pub body: Vec<CodePoint>,
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use oxc_diagnostics::OxcDiagnostic;
use oxc_span::Span;

#[cold]
pub fn invalid_input(span: Span) -> OxcDiagnostic {
OxcDiagnostic::error(
"Template literal should be wrapped with ` or escaped properly".to_string(),
)
.with_label(span)
}

#[cold]
pub fn template_substitution(span: Span) -> OxcDiagnostic {
OxcDiagnostic::error("Template literal should not contain unescaped `${}`".to_string())
.with_label(span)
}

#[cold]
pub fn too_large_unicode_escape_sequence(span: Span) -> OxcDiagnostic {
OxcDiagnostic::error("Too large unicode escape sequence".to_string()).with_label(span)
}

#[cold]
pub fn invalid_hex_escape(span: Span) -> OxcDiagnostic {
OxcDiagnostic::error("Invalid hex escape sequence".to_string()).with_label(span)
}

#[cold]
pub fn invalid_unicode_escape(span: Span) -> OxcDiagnostic {
OxcDiagnostic::error("Invalid unicode escape sequence".to_string()).with_label(span)
}
Loading
Loading