Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: parse pragmas with monch #224

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ anyhow = "1.0.43"
data-url = "0.2.0"
deno_ast = { version = "0.24.0", features = ["dep_graph", "module_specifier"] }
futures = "0.3.17"
monch = "0.4.0"
once_cell = "1.16.0"
parking_lot = "0.12.0"
regex = "1.5.4"
Expand Down
64 changes: 19 additions & 45 deletions src/analyzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,11 @@ use deno_ast::ModuleSpecifier;
use deno_ast::SourceRange;
use deno_ast::SourceRangedForSpanned;
use deno_ast::SourceTextInfo;
use once_cell::sync::Lazy;
use regex::Match;
use regex::Regex;
use serde::Deserialize;
use serde::Serialize;

use crate::graph::Position;

/// Matches the `@deno-types` pragma.
static DENO_TYPES_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)^\s*@deno-types\s*=\s*(?:["']([^"']+)["']|(\S+))"#).unwrap()
});
use crate::pragma::parse_deno_types;

/// A `@deno-types` pragma.
pub struct DenoTypesPragma {
Expand All @@ -32,47 +25,28 @@ pub struct DenoTypesPragma {
pub fn analyze_deno_types(
desc: &DependencyDescriptor,
) -> Option<DenoTypesPragma> {
fn comment_position_to_position_range(
mut comment_start: Position,
m: &Match,
) -> PositionRange {
// the comment text starts after the double slash or slash star, so add 2
comment_start.character += 2;
PositionRange {
// This will always be on the same line.
// Does -1 and +1 to include the quotes
let comment = desc.leading_comments.last()?;
// @deno-types cannot be on a multi-line comment
if comment.range.start.line != comment.range.end.line {
return None;
}

let deno_types = parse_deno_types(&comment.text).ok()?;
// the comment text starts after the double slash or slash star, so add 2
let start_char = comment.range.start.character + 2;
Some(DenoTypesPragma {
specifier: deno_types.text.to_string(),
range: PositionRange {
start: Position {
line: comment_start.line,
character: comment_start.character + m.start() - 1,
line: comment.range.start.line,
character: start_char + deno_types.quote_start,
},
end: Position {
line: comment_start.line,
character: comment_start.character + m.end() + 1,
line: comment.range.start.line,
character: start_char + deno_types.quote_end,
},
}
}

let comment = desc.leading_comments.last()?;
let captures = DENO_TYPES_RE.captures(&comment.text)?;
if let Some(m) = captures.get(1) {
Some(DenoTypesPragma {
specifier: m.as_str().to_string(),
range: comment_position_to_position_range(
comment.range.start.clone(),
&m,
),
})
} else if let Some(m) = captures.get(2) {
Some(DenoTypesPragma {
specifier: m.as_str().to_string(),
range: comment_position_to_position_range(
comment.range.start.clone(),
&m,
),
})
} else {
unreachable!("Unexpected captures from deno types regex")
}
},
})
}

#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
Expand Down
63 changes: 32 additions & 31 deletions src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::analyzer::SpecifierWithRange;
use crate::analyzer::TypeScriptReference;
use crate::graph::Position;
use crate::module_specifier::ModuleSpecifier;
use crate::pragma::parse_triple_slash_reference;
use crate::DependencyKind;
use crate::ImportAssertions;

Expand All @@ -29,24 +30,15 @@ use std::cell::RefCell;
use std::collections::HashMap;
use std::sync::Arc;

// todo(dsherret): parse all these with monch in the future

/// Matches a JSDoc import type reference (`{import("./example.js")}`
static JSDOC_IMPORT_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"\{[^}]*import\(['"]([^'"]+)['"]\)[^}]*}"#).unwrap()
});
/// Matches the `@jsxImportSource` pragma.
static JSX_IMPORT_SOURCE_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?i)^[\s*]*@jsxImportSource\s+(\S+)"#).unwrap());
/// Matches a `/// <reference ... />` comment reference.
static TRIPLE_SLASH_REFERENCE_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)^/\s*<reference\s.*?/>").unwrap());
/// Matches a path reference, which adds a dependency to a module
static PATH_REFERENCE_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?i)\spath\s*=\s*["']([^"']*)["']"#).unwrap());
/// Matches a types reference, which for JavaScript files indicates the
/// location of types to use when type checking a program that includes it as
/// a dependency.
static TYPES_REFERENCE_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?i)\stypes\s*=\s*["']([^"']*)["']"#).unwrap());

/// Parses modules to a ParsedSource.
pub trait ModuleParser {
Expand Down Expand Up @@ -332,30 +324,39 @@ fn analyze_ts_references(
) -> Vec<TypeScriptReference> {
let mut references = Vec::new();
for comment in parsed_source.get_leading_comments().iter() {
if TRIPLE_SLASH_REFERENCE_RE.is_match(&comment.text) {
let comment_start = comment.start();
if let Some(captures) = PATH_REFERENCE_RE.captures(&comment.text) {
let m = captures.get(1).unwrap();
if comment.kind == CommentKind::Line {
if let Ok(path_ref) = parse_triple_slash_reference("path", &comment.text)
{
let comment_start = comment.range().start + 2;
references.push(TypeScriptReference::Path(SpecifierWithRange {
text: m.as_str().to_string(),
range: comment_source_to_position_range(
comment_start,
&m,
parsed_source.text_info(),
false,
),
text: path_ref.text.to_string(),
range: PositionRange {
start: Position::from_source_pos(
comment_start + path_ref.quote_start,
parsed_source.text_info(),
),
end: Position::from_source_pos(
comment_start + path_ref.quote_end,
parsed_source.text_info(),
),
},
}));
} else if let Some(captures) = TYPES_REFERENCE_RE.captures(&comment.text)
} else if let Ok(path_ref) =
parse_triple_slash_reference("types", &comment.text)
{
let m = captures.get(1).unwrap();
let comment_start = comment.range().start + 2;
references.push(TypeScriptReference::Types(SpecifierWithRange {
text: m.as_str().to_string(),
range: comment_source_to_position_range(
comment_start,
&m,
parsed_source.text_info(),
false,
),
text: path_ref.text.to_string(),
range: PositionRange {
start: Position::from_source_pos(
comment_start + path_ref.quote_start,
parsed_source.text_info(),
),
end: Position::from_source_pos(
comment_start + path_ref.quote_end,
parsed_source.text_info(),
),
},
}));
}
}
Expand Down
32 changes: 10 additions & 22 deletions src/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,26 +606,7 @@ pub struct Module {
}

impl Module {
fn new(
specifier: ModuleSpecifier,
kind: ModuleKind,
source: Arc<str>,
) -> Self {
Self {
dependencies: Default::default(),
kind,
maybe_cache_info: None,
maybe_source: Some(source),
maybe_types_dependency: None,
media_type: MediaType::Unknown,
specifier,
}
}

pub fn new_without_source(
specifier: ModuleSpecifier,
kind: ModuleKind,
) -> Self {
fn new_without_source(specifier: ModuleSpecifier, kind: ModuleKind) -> Self {
Self {
dependencies: Default::default(),
kind,
Expand Down Expand Up @@ -1414,8 +1395,15 @@ pub(crate) fn parse_module_from_module_info(
maybe_resolver: Option<&dyn Resolver>,
) -> Module {
// Init the module and determine its media type
let mut module = Module::new(specifier.clone(), kind, source);
module.media_type = media_type;
let mut module = Module {
dependencies: Default::default(),
kind,
maybe_cache_info: None,
maybe_source: Some(source),
maybe_types_dependency: None,
media_type,
specifier: specifier.clone(),
};

// Analyze the TypeScript triple-slash references
for reference in module_info.ts_references {
Expand Down
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mod analyzer;
mod ast;
mod graph;
mod module_specifier;
mod pragma;
pub mod source;
mod text_encoding;

Expand Down Expand Up @@ -476,7 +477,7 @@ mod tests {
Source::Module {
specifier: "file:///a/test01.ts",
maybe_headers: None,
content: r#"// @deno-types=./test02.d.ts
content: r#"// @deno-types="./test02.d.ts"
import * as a from "./test02.js";

console.log(a);
Expand Down
62 changes: 62 additions & 0 deletions src/pragma.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.

use monch::*;

pub struct ParsedDenoTypes<'a> {
pub text: &'a str,
pub quote_start: usize,
pub quote_end: usize,
}

pub fn parse_deno_types(input: &str) -> Result<ParsedDenoTypes, ParseError> {
let original_input = input;
let (input, _) = skip_whitespace(input)?;
let (input, _) = tag("@deno-types")(input)?;
let (input, _) = ch('=')(input)?;
let quote_start_input = input;
let (input, quote_char) = or(ch('"'), ch('\"'))(input)?;
let (input, text) = take_while(|c| c != quote_char)(input)?;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would go across newlines, which is not good.

let (input, _) = ch(quote_char)(input)?;
Ok(ParsedDenoTypes {
text,
quote_start: original_input.len() - quote_start_input.len(),
quote_end: original_input.len() - input.len(),
})
}

pub struct ParsedTripleSlashReference<'a> {
pub text: &'a str,
pub quote_start: usize,
pub quote_end: usize,
}

/// Matches a `/// <reference ... />` comment reference based on the kind (ex. path or types).
pub fn parse_triple_slash_reference<'a>(
kind: &str,
input: &'a str,
) -> Result<ParsedTripleSlashReference<'a>, ParseError<'a>> {
// regex in TS codebase: /^(\/\/\/\s*<reference\s+path\s*=\s*)(('[^']*')|("[^"]*")).*?\/>/

let original_input = input;
let (input, _) = ch('/')(input)?; // only one, because we're starting from within a comment line
let (input, _) = skip_whitespace(input)?;
let (input, _) = tag("<reference")(input)?;
let (input, _) = skip_whitespace(input)?;
let (input, _) = tag(kind)(input)?; // "path" or "types"
let (input, _) = skip_whitespace(input)?;
let (input, _) = ch('=')(input)?;
let (input, _) = skip_whitespace(input)?;
let quote_start_input = input;
let (input, quote_char) = or(ch('"'), ch('\"'))(input)?;
let (input, text) = take_while(|c| c != quote_char)(input)?;
let (input, _) = ch(quote_char)(input)?;
let quote_end_input = input;
if !input.contains("/>") {
return Err(monch::ParseError::Backtrace);
}
Ok(ParsedTripleSlashReference {
text,
quote_start: original_input.len() - quote_start_input.len(),
quote_end: original_input.len() - quote_end_input.len(),
})
}