Skip to content

Commit

Permalink
Retain rule and alternation comments.
Browse files Browse the repository at this point in the history
See test/instaparse/retain_comments.cljc for examples.
  • Loading branch information
kanaka committed Apr 25, 2018
1 parent dcfffad commit 2215334
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 32 deletions.
80 changes: 48 additions & 32 deletions src/instaparse/cfg.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,17 @@
:hiccup ; use the hiccup output format
{:rules (hide-tag (cat opt-whitespace
(plus (nt :rule))))
:comment (cat (string "(*") (nt :inside-comment) (string "*)"))
:inside-comment (cat (regexp inside-comment)
(star (cat (nt :comment)
(regexp inside-comment))))
:opt-whitespace (cat (regexp ws)
(star (cat (nt :comment)
(regexp ws))))
:comment (cat (hide (string "(*")) (regexp inside-comment) (hide (string "*)")))
:opt-whitespace (hide (regexp ws))
:opt-comment (cat (hide (regexp ws))
(star (cat (nt :comment)
(hide (regexp ws)))))
:rule-separator (alt (string ":")
(string ":=")
(string "::=")
(string "="))
:rule (cat (alt (nt :nt)
:rule (cat (nt :opt-comment)
(alt (nt :nt)
(nt :hide-nt))
opt-whitespace
(hide (nt :rule-separator))
Expand Down Expand Up @@ -109,9 +108,9 @@
opt-whitespace
(hide (string ">")))
:cat (plus (cat
opt-whitespace
(nt :opt-comment)
(alt (nt :factor) (nt :look) (nt :neg))
opt-whitespace))
(nt :opt-comment)))
:string (alt
(regexp single-quoted-string)
(regexp double-quoted-string))
Expand Down Expand Up @@ -247,34 +246,50 @@
(defn build-rule
"Convert one parsed rule from the grammar into combinators"
[tree]
(case (tag tree)
:rule (let [[nt alt-or-ord] (contents tree)]
(if (= (tag nt) :hide-nt)
[(keyword (content (content nt)))
(hide-tag (build-rule alt-or-ord))]
[(keyword (content nt))
(build-rule alt-or-ord)]))
:nt (nt (keyword (content tree)))
:alt (apply alt (map build-rule (contents tree)))
:ord (apply ord (map build-rule (contents tree)))
:paren (recur (content tree))
:hide (hide (build-rule (content tree)))
:cat (apply cat (map build-rule (contents tree)))
:string (string+ (process-string (content tree)) false)
:regexp (regexp (process-regexp (content tree)))
:opt (opt (build-rule (content tree)))
:star (star (build-rule (content tree)))
:plus (plus (build-rule (content tree)))
:look (look (build-rule (content tree)))
:neg (neg (build-rule (content tree)))
:epsilon Epsilon))
;(prn :build-rule :tag (tag tree) :content (content tree) :tree tree)
(let [cs (contents tree)
comments (seq (map #(second %)
(mapcat #(contents %)
(filter #(= :opt-comment (first %)) cs))))
cs (remove #(= :opt-comment (first %)) cs)]
(if (= :rule (tag tree))
(let [[nt alt-or-ord] cs]
(if (= (tag nt) :hide-nt)
[(keyword (content (content nt)))
(merge
(hide-tag (build-rule alt-or-ord))
(when comments {:comments comments}))]
[(keyword (content nt))
(merge
(build-rule alt-or-ord)
(when comments {:comments comments}))]))
(merge
(case (tag tree)
:nt (nt (keyword (content tree)))
:alt (apply alt (map build-rule (contents tree)))
:ord (apply ord (map build-rule (contents tree)))
:paren (build-rule (content tree))
:hide (hide (build-rule (content tree)))
:cat (apply cat (map build-rule cs))
:string (string+ (process-string (content tree)) false)
;:opt-comment (opt-comment cs)
:regexp (regexp (process-regexp (content tree)))
:opt (opt (build-rule (content tree)))
:star (star (build-rule (content tree)))
:plus (plus (build-rule (content tree)))
:look (look (build-rule (content tree)))
:neg (neg (build-rule (content tree)))
:epsilon Epsilon)
(when comments
{:comments comments})))))

(defn seq-nt
"Returns a sequence of all non-terminals in a parser built from combinators."
[parser]
;(prn :seq-nt :tag (:tag parser))
(case (:tag parser)
:nt [(:keyword parser)]
(:string :string-ci :char :regexp :epsilon) []
(:string :string-ci :char :regexp :comment :epsilon) []
(:opt :plus :star :look :neg :rep) (recur (:parser parser))
(:alt :cat) (mapcat seq-nt (:parsers parser))
:ord (mapcat seq-nt
Expand All @@ -293,6 +308,7 @@

(defn build-parser [spec output-format]
(let [rules (parse cfg :rules spec false)]
;(clojure.pprint/pprint {:rules rules})
(if (instance? instaparse.gll.Failure rules)
(throw-runtime-exception
"Error parsing grammar specification:\n"
Expand Down
5 changes: 5 additions & 0 deletions src/instaparse/combinators_source.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@
:regexp (-> (re-pattern r)
#?(:cljs add-beginning-constraint))}))

(defn opt-comment "Create a comment terminal out of s"
[s]
(if (= s "") Epsilon
{:tag :comment :comment s}))

(defn nt "Refers to a non-terminal defined by the grammar map"
[s]
{:tag :nt :keyword s})
Expand Down
77 changes: 77 additions & 0 deletions test/instaparse/retain_comments.cljc
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
(ns instaparse.retain-comments
(:require
#?(:clj [clojure.test :refer [deftest is]]
:cljs [cljs.test :as t])
#?(:clj [instaparse.core :as insta]
:cljs [instaparse.core :as insta]))
#?(:cljs (:require-macros
[cljs.test :refer [is deftest]])))

(def retain1
(insta/parser
"foo = 'bar' (* a comment for 'bar' *)
| 'baz'"))

(def retain2
(insta/parser
"(* this is a top-level comment *)
foo = 'bar' (* a comment for 'bar' *)
| 'baz' (* a comment for 'baz' *)"))

(def retain3
(insta/parser
"(* foo comment 1 *)
(* foo comment 2 *)
foo = 'bar' (* 'bar' comment 1 *) (* 'bar' comment 2 *)
| 'baz' (* 'baz' comment 1 *) (* 'baz' comment 2 *) ;
(* qux comment 1 *)
(* qux comment 2 *)
qux = 'quux' (* 'quux' comment 1 *) (* 'quux' comment 2 *)
| 'quuux' (* 'quuux' comment 1 *) (* 'quuux' comment 2 *) ;
(* blah comment 1 *)
(* blah comment 2 *)
blah = 'blahblah' (* 'blahblah' comment 1 *) (* 'blahblah' comment 2 *)"))

(def retain4
(insta/parser
"(* foo comment *)
foo = 'bar' (* 'bar' comment *)
| 'baz' (* 'baz' comment 1 *)
(* 'baz' comment 2 *)
qux = 'quux' (* 'quux' comment *)
| 'quuux' (* 'quuux' comment *)"))


(deftest retain-tests
(is (= (-> retain1 :grammar :foo :parsers first :comments)
'(" a comment for 'bar' ")))

(is (= (-> retain2 :grammar :foo :parsers first :comments)
'(" a comment for 'bar' ")))
(is (= (-> retain2 :grammar :foo :parsers second :comments)
'(" a comment for 'baz' ")))

(is (= (-> retain3 :grammar :foo :comments)
'(" foo comment 1 " " foo comment 2 ")))
(is (= (-> retain3 :grammar :foo :parsers first :comments)
'(" 'bar' comment 1 " " 'bar' comment 2 ")))
(is (= (-> retain3 :grammar :foo :parsers second :comments)
'(" 'baz' comment 1 " " 'baz' comment 2 ")))
(is (= (-> retain3 :grammar :qux :comments)
'(" qux comment 1 " " qux comment 2 ")))
(is (= (-> retain3 :grammar :qux :parsers first :comments)
'(" 'quux' comment 1 " " 'quux' comment 2 ")))
(is (= (-> retain3 :grammar :qux :parsers second :comments)
'(" 'quuux' comment 1 " " 'quuux' comment 2 ")))

(is (= (-> retain4 :grammar :foo :comments)
'(" foo comment ")))
(is (= (-> retain4 :grammar :foo :parsers first :comments)
'(" 'bar' comment ")))
(is (= (-> retain4 :grammar :foo :parsers second :comments)
'(" 'baz' comment 1 " " 'baz' comment 2 ")))
(is (= (-> retain4 :grammar :qux :parsers first :comments)
'(" 'quux' comment ")))
(is (= (-> retain4 :grammar :qux :parsers second :comments)
'(" 'quuux' comment "))))

0 comments on commit 2215334

Please sign in to comment.