From d681ba40d212e7de6a5fa43f4bf84273ffe417ec Mon Sep 17 00:00:00 2001 From: Teppei Shintani Date: Tue, 18 Jun 2024 00:27:52 +0900 Subject: [PATCH] Change parsing from regular expression to Prism AST To stabilize the operation, the parsing was changed from regular expression parsing to Prism AST-based parsing. --- .rubocop.yml | 6 ++ lib/rbs_inline_data/cli.rb | 25 ++++--- lib/rbs_inline_data/parser.rb | 67 ++++++++++++------- rbs_inline_data.gemspec | 1 + .../data/lib/rbs_inline_data/parser.rbs | 8 +++ sig/generated/rbs_inline_data/cli.rbs | 6 ++ sig/generated/rbs_inline_data/parser.rbs | 17 ++++- sig/patch.rbs | 9 +++ test/rbs_inline_data/parser_test.rb | 13 ++++ 9 files changed, 118 insertions(+), 34 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index bd6498d..ab7d62e 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,5 +1,6 @@ AllCops: TargetRubyVersion: 3.3 + NewCops: enable Style/StringLiterals: EnforcedStyle: double_quotes @@ -17,3 +18,8 @@ Metrics/ClassLength: Metrics/MethodLength: Exclude: - 'test/**/*' + Max: 20 + +Style/Style/Documentation: + Enabled: false + diff --git a/lib/rbs_inline_data/cli.rb b/lib/rbs_inline_data/cli.rb index 6aed382..ec065d4 100644 --- a/lib/rbs_inline_data/cli.rb +++ b/lib/rbs_inline_data/cli.rb @@ -19,22 +19,29 @@ def run(args) end end.parse!(args) - targets = Pathname.glob(args[0]).flat_map do |path| - if path.directory? - Pathname.glob(path.join("**/*.rb").to_s) + get_targets(args[0]).each do |target| + result = Prism.parse_file(target.to_s) + definitions = Parser.parse(result) + Writer.write(definitions, output_path ? (output_path + target).sub_ext(".rbs") : nil) + end + end + + private + + #:: (String) -> Array[Pathname] + def get_targets(path) + targets = Pathname.glob(path).flat_map do |pathname| + if pathname.directory? + Pathname.glob(pathname.join("**/*.rb").to_s) else - path + pathname end end targets.sort! targets.uniq! - targets.each do |target| - result = Prism.parse_file(target.to_s) - definitions = Parser.parse(result) - Writer.write(definitions, output_path ? (output_path + target).sub_ext(".rbs") : nil) - end + targets end end end diff --git a/lib/rbs_inline_data/parser.rb b/lib/rbs_inline_data/parser.rb index 2d45a0b..4c5d095 100644 --- a/lib/rbs_inline_data/parser.rb +++ b/lib/rbs_inline_data/parser.rb @@ -14,14 +14,37 @@ class Parser < Prism::Visitor :field_name, #:: String :type #:: String ) + # @rbs skip + Comments = Data.define( + :comment_lines #:: Hash[Integer, String] + ) + class Comments + MARKER = "#::" + + #:: (Array[Prism::Comment]) -> RbsInlineData::Parser::Comments + def self.from_prism_comments(comments) + # @type var comment_lines: Hash[Integer, String] + comment_lines = {} + comments.each do |comment| + sliced = comment.slice + next unless sliced.start_with?(MARKER) + + comment_lines[comment.location.start_line] = sliced.sub(MARKER, "").strip + end + + new(comment_lines:) + end + end # @rbs @definitions: Array[RbsInlineData::Parser::TypedDefinition] # @rbs @surronding_class_or_module: Array[Symbol] + # @rbs @comments: RbsInlineData::Parser::Comments # rubocop:disable Lint/MissingSuper - #:: (Array[RbsInlineData::Parser::TypedDefinition]) -> void - def initialize(definitions) + #:: (Array[RbsInlineData::Parser::TypedDefinition], RbsInlineData::Parser::Comments) -> void + def initialize(definitions, comments) @definitions = definitions + @comments = comments @surronding_class_or_module = [] end # rubocop:enable Lint/MissingSuper @@ -30,7 +53,8 @@ def initialize(definitions) def self.parse(result) # @type var definitions: Array[RbsInlineData::Parser::TypedDefinition] definitions = [] - instance = new(definitions) + comments = Comments.from_prism_comments(result.comments) + instance = new(definitions, comments) result.value.accept(instance) definitions end @@ -80,30 +104,27 @@ def define_data?(node) #:: (Prism::ConstantWriteNode) -> RbsInlineData::Parser::TypedDefinition? def extract_definition(node) - source = node.slice - _, class_name, field_text = source.match(/\A([a-zA-Z0-9]+) ?= ?Data\.define\(([\n\s\w\W]+)\)\z/).to_a - return nil if field_text.nil? || class_name.nil? - - class_name = "#{@surronding_class_or_module.join("::")}::#{class_name}" - - fields = field_text.split("\n").map(&:strip).reject(&:empty?).map do |str| - case str - when /:(\w+),? #:: ([\w:\[\], ]+)/ - [::Regexp.last_match(1), ::Regexp.last_match(2)] - when /:(\w+),?/ - [::Regexp.last_match(1), "untyped"] - end - end.compact.map do |field_name, type| - TypedField.new( - field_name:, - type: - ) + arguments_node = node.value.arguments + if arguments_node + typed_fields = arguments_node.arguments.map do |sym_node| + return nil unless sym_node.is_a?(Prism::SymbolNode) + + TypedField.new( + field_name: sym_node.unescaped, + type: type_of(sym_node) + ) + end.compact end TypedDefinition.new( - class_name:, - fields: + class_name: "#{@surronding_class_or_module.join("::")}::#{node.name}", + fields: typed_fields || [] ) end + + #:: (Prism::SymbolNode) -> String + def type_of(node) + @comments.comment_lines[node.location.start_line] || "untyped" + end end end diff --git a/rbs_inline_data.gemspec b/rbs_inline_data.gemspec index 95db1ac..2e93f2a 100644 --- a/rbs_inline_data.gemspec +++ b/rbs_inline_data.gemspec @@ -30,4 +30,5 @@ Gem::Specification.new do |spec| spec.add_dependency "prism" spec.add_dependency "rbs" + spec.metadata["rubygems_mfa_required"] = "true" end diff --git a/sig/generated/data/lib/rbs_inline_data/parser.rbs b/sig/generated/data/lib/rbs_inline_data/parser.rbs index da161b6..29e8e33 100644 --- a/sig/generated/data/lib/rbs_inline_data/parser.rbs +++ b/sig/generated/data/lib/rbs_inline_data/parser.rbs @@ -15,3 +15,11 @@ class RbsInlineData::Parser::TypedField | (**untyped) -> ::RbsInlineData::Parser::TypedField | ... end + +class RbsInlineData::Parser::Comments + extend Data::_DataClass + attr_reader comment_lines: Hash[Integer, String] + def self.new: (*untyped) -> ::RbsInlineData::Parser::Comments + | (**untyped) -> ::RbsInlineData::Parser::Comments + | ... +end diff --git a/sig/generated/rbs_inline_data/cli.rbs b/sig/generated/rbs_inline_data/cli.rbs index 470b24d..1b01775 100644 --- a/sig/generated/rbs_inline_data/cli.rbs +++ b/sig/generated/rbs_inline_data/cli.rbs @@ -1,8 +1,14 @@ # Generated from lib/rbs_inline_data/cli.rb with RBS::Inline module RbsInlineData + # Process executed when running the rbs-inline-data command. class CLI # :: (Array[String]) -> void def run: (Array[String]) -> void + + private + + # :: (String) -> Array[Pathname] + def get_targets: (String) -> Array[Pathname] end end diff --git a/sig/generated/rbs_inline_data/parser.rbs b/sig/generated/rbs_inline_data/parser.rbs index 5e5b5cf..6627347 100644 --- a/sig/generated/rbs_inline_data/parser.rbs +++ b/sig/generated/rbs_inline_data/parser.rbs @@ -2,12 +2,22 @@ module RbsInlineData class Parser < Prism::Visitor + class Comments + MARKER: ::String + + # :: (Array[Prism::Comment]) -> RbsInlineData::Parser::Comments + def self.from_prism_comments: (Array[Prism::Comment]) -> RbsInlineData::Parser::Comments + end + @definitions: Array[RbsInlineData::Parser::TypedDefinition] @surronding_class_or_module: Array[Symbol] - # :: (Array[RbsInlineData::Parser::TypedDefinition]) -> void - def initialize: (Array[RbsInlineData::Parser::TypedDefinition]) -> void + @comments: RbsInlineData::Parser::Comments + + # rubocop:disable Lint/MissingSuper + # :: (Array[RbsInlineData::Parser::TypedDefinition], RbsInlineData::Parser::Comments) -> void + def initialize: (Array[RbsInlineData::Parser::TypedDefinition], RbsInlineData::Parser::Comments) -> void # :: (Prism::ParseResult) -> Array[RbsInlineData::Parser::TypedDefinition] def self.parse: (Prism::ParseResult) -> Array[RbsInlineData::Parser::TypedDefinition] @@ -31,5 +41,8 @@ module RbsInlineData # :: (Prism::ConstantWriteNode) -> RbsInlineData::Parser::TypedDefinition? def extract_definition: (Prism::ConstantWriteNode) -> RbsInlineData::Parser::TypedDefinition? + + # :: (Prism::SymbolNode) -> String + def type_of: (Prism::SymbolNode) -> String end end diff --git a/sig/patch.rbs b/sig/patch.rbs index 6a37ef4..8d70d6d 100644 --- a/sig/patch.rbs +++ b/sig/patch.rbs @@ -8,4 +8,13 @@ module Prism def constant_path: () -> Prism::ConstantReadNode | ... end + + class Comment + def slice: () -> String + end + + class ConstantWriteNode + def value: () -> CallNode + | ... + end end diff --git a/test/rbs_inline_data/parser_test.rb b/test/rbs_inline_data/parser_test.rb index 2bdf689..2577090 100644 --- a/test/rbs_inline_data/parser_test.rb +++ b/test/rbs_inline_data/parser_test.rb @@ -132,5 +132,18 @@ class A ] ) end + + def test_no_args + definitions = Parser.parse(parse_ruby(<<~RUBY)) + class A + B = Data.define + end + RUBY + + assert_equal definitions[0], Parser::TypedDefinition.new( + class_name: "A::B", + fields: [] + ) + end end end