diff --git a/src/instaparse/cfg.cljc b/src/instaparse/cfg.cljc index 36aabd8..fe5e222 100644 --- a/src/instaparse/cfg.cljc +++ b/src/instaparse/cfg.cljc @@ -55,18 +55,17 @@ :hiccup ; use the hiccup output format {:rules (hide-tag (cat opt-whitespace (plus (nt :rule)))) - :comment (cat (string "(*") (nt :inside-comment) (string "*)")) - :inside-comment (cat (regexp inside-comment) - (star (cat (nt :comment) - (regexp inside-comment)))) - :opt-whitespace (cat (regexp ws) - (star (cat (nt :comment) - (regexp ws)))) + :comment (cat (hide (string "(*")) (regexp inside-comment) (hide (string "*)"))) + :opt-whitespace (hide (regexp ws)) + :opt-comment (cat (hide (regexp ws)) + (star (cat (nt :comment) + (hide (regexp ws))))) :rule-separator (alt (string ":") (string ":=") (string "::=") (string "=")) - :rule (cat (alt (nt :nt) + :rule (cat (nt :opt-comment) + (alt (nt :nt) (nt :hide-nt)) opt-whitespace (hide (nt :rule-separator)) @@ -109,9 +108,9 @@ opt-whitespace (hide (string ">"))) :cat (plus (cat - opt-whitespace + (nt :opt-comment) (alt (nt :factor) (nt :look) (nt :neg)) - opt-whitespace)) + (nt :opt-comment))) :string (alt (regexp single-quoted-string) (regexp double-quoted-string)) @@ -247,34 +246,50 @@ (defn build-rule "Convert one parsed rule from the grammar into combinators" [tree] - (case (tag tree) - :rule (let [[nt alt-or-ord] (contents tree)] - (if (= (tag nt) :hide-nt) - [(keyword (content (content nt))) - (hide-tag (build-rule alt-or-ord))] - [(keyword (content nt)) - (build-rule alt-or-ord)])) - :nt (nt (keyword (content tree))) - :alt (apply alt (map build-rule (contents tree))) - :ord (apply ord (map build-rule (contents tree))) - :paren (recur (content tree)) - :hide (hide (build-rule (content tree))) - :cat (apply cat (map build-rule (contents tree))) - :string (string+ (process-string (content tree)) false) - :regexp (regexp (process-regexp (content tree))) - :opt (opt (build-rule (content tree))) - :star (star (build-rule (content tree))) - :plus (plus (build-rule (content tree))) - :look (look (build-rule (content tree))) - :neg (neg (build-rule (content tree))) - :epsilon Epsilon)) + ;(prn :build-rule :tag (tag tree) :content (content tree) :tree tree) + (let [cs (contents tree) + comments (seq (map #(second %) + (mapcat #(contents %) + (filter #(= :opt-comment (first %)) cs)))) + cs (remove #(= :opt-comment (first %)) cs)] + (if (= :rule (tag tree)) + (let [[nt alt-or-ord] cs] + (if (= (tag nt) :hide-nt) + [(keyword (content (content nt))) + (merge + (hide-tag (build-rule alt-or-ord)) + (when comments {:comments comments}))] + [(keyword (content nt)) + (merge + (build-rule alt-or-ord) + (when comments {:comments comments}))])) + (merge + (case (tag tree) + :nt (nt (keyword (content tree))) + :alt (apply alt (map build-rule (contents tree))) + :ord (apply ord (map build-rule (contents tree))) + :paren (build-rule (content tree)) + :hide (hide (build-rule (content tree))) + :cat (apply cat (map build-rule cs)) + :string (string+ (process-string (content tree)) false) + ;:opt-comment (opt-comment cs) + :regexp (regexp (process-regexp (content tree))) + :opt (opt (build-rule (content tree))) + :star (star (build-rule (content tree))) + :plus (plus (build-rule (content tree))) + :look (look (build-rule (content tree))) + :neg (neg (build-rule (content tree))) + :epsilon Epsilon) + (when comments + {:comments comments}))))) (defn seq-nt "Returns a sequence of all non-terminals in a parser built from combinators." [parser] + ;(prn :seq-nt :tag (:tag parser)) (case (:tag parser) :nt [(:keyword parser)] - (:string :string-ci :char :regexp :epsilon) [] + (:string :string-ci :char :regexp :comment :epsilon) [] (:opt :plus :star :look :neg :rep) (recur (:parser parser)) (:alt :cat) (mapcat seq-nt (:parsers parser)) :ord (mapcat seq-nt @@ -293,6 +308,7 @@ (defn build-parser [spec output-format] (let [rules (parse cfg :rules spec false)] + ;(clojure.pprint/pprint {:rules rules}) (if (instance? instaparse.gll.Failure rules) (throw-runtime-exception "Error parsing grammar specification:\n" diff --git a/src/instaparse/combinators_source.cljc b/src/instaparse/combinators_source.cljc index 974f50a..7cf0f6a 100644 --- a/src/instaparse/combinators_source.cljc +++ b/src/instaparse/combinators_source.cljc @@ -92,6 +92,11 @@ :regexp (-> (re-pattern r) #?(:cljs add-beginning-constraint))})) +(defn opt-comment "Create a comment terminal out of s" + [s] + (if (= s "") Epsilon + {:tag :comment :comment s})) + (defn nt "Refers to a non-terminal defined by the grammar map" [s] {:tag :nt :keyword s}) diff --git a/test/instaparse/retain_comments.cljc b/test/instaparse/retain_comments.cljc new file mode 100644 index 0000000..bbfdde1 --- /dev/null +++ b/test/instaparse/retain_comments.cljc @@ -0,0 +1,77 @@ +(ns instaparse.retain-comments + (:require + #?(:clj [clojure.test :refer [deftest is]] + :cljs [cljs.test :as t]) + #?(:clj [instaparse.core :as insta] + :cljs [instaparse.core :as insta])) + #?(:cljs (:require-macros + [cljs.test :refer [is deftest]]))) + +(def retain1 + (insta/parser + "foo = 'bar' (* a comment for 'bar' *) + | 'baz'")) + +(def retain2 + (insta/parser + "(* this is a top-level comment *) + foo = 'bar' (* a comment for 'bar' *) + | 'baz' (* a comment for 'baz' *)")) + +(def retain3 + (insta/parser + "(* foo comment 1 *) + (* foo comment 2 *) + foo = 'bar' (* 'bar' comment 1 *) (* 'bar' comment 2 *) + | 'baz' (* 'baz' comment 1 *) (* 'baz' comment 2 *) ; + (* qux comment 1 *) + (* qux comment 2 *) + qux = 'quux' (* 'quux' comment 1 *) (* 'quux' comment 2 *) + | 'quuux' (* 'quuux' comment 1 *) (* 'quuux' comment 2 *) ; + (* blah comment 1 *) + (* blah comment 2 *) + blah = 'blahblah' (* 'blahblah' comment 1 *) (* 'blahblah' comment 2 *)")) + +(def retain4 + (insta/parser + "(* foo comment *) + foo = 'bar' (* 'bar' comment *) + | 'baz' (* 'baz' comment 1 *) + (* 'baz' comment 2 *) + qux = 'quux' (* 'quux' comment *) + | 'quuux' (* 'quuux' comment *)")) + + +(deftest retain-tests + (is (= (-> retain1 :grammar :foo :parsers first :comments) + '(" a comment for 'bar' "))) + + (is (= (-> retain2 :grammar :foo :parsers first :comments) + '(" a comment for 'bar' "))) + (is (= (-> retain2 :grammar :foo :parsers second :comments) + '(" a comment for 'baz' "))) + + (is (= (-> retain3 :grammar :foo :comments) + '(" foo comment 1 " " foo comment 2 "))) + (is (= (-> retain3 :grammar :foo :parsers first :comments) + '(" 'bar' comment 1 " " 'bar' comment 2 "))) + (is (= (-> retain3 :grammar :foo :parsers second :comments) + '(" 'baz' comment 1 " " 'baz' comment 2 "))) + (is (= (-> retain3 :grammar :qux :comments) + '(" qux comment 1 " " qux comment 2 "))) + (is (= (-> retain3 :grammar :qux :parsers first :comments) + '(" 'quux' comment 1 " " 'quux' comment 2 "))) + (is (= (-> retain3 :grammar :qux :parsers second :comments) + '(" 'quuux' comment 1 " " 'quuux' comment 2 "))) + + (is (= (-> retain4 :grammar :foo :comments) + '(" foo comment "))) + (is (= (-> retain4 :grammar :foo :parsers first :comments) + '(" 'bar' comment "))) + (is (= (-> retain4 :grammar :foo :parsers second :comments) + '(" 'baz' comment 1 " " 'baz' comment 2 "))) + (is (= (-> retain4 :grammar :qux :parsers first :comments) + '(" 'quux' comment "))) + (is (= (-> retain4 :grammar :qux :parsers second :comments) + '(" 'quuux' comment ")))) +