Skip to content

Commit

Permalink
adding grammars
Browse files Browse the repository at this point in the history
  • Loading branch information
mike dupont committed Feb 17, 2024
1 parent ea7f6c9 commit 2350d7d
Show file tree
Hide file tree
Showing 12 changed files with 191 additions and 25 deletions.
20 changes: 19 additions & 1 deletion README.org
Original file line number Diff line number Diff line change
Expand Up @@ -279,4 +279,22 @@ Error at `Assoc ([("content",
("tokens_predicted", `Int (128)); ("truncated", `Bool (false))])


dune exec ./bin/simple_grammar.exe -- --llamacpp -s test4 -u "http://localhost:8080" -p "consider a consecutive series of types to describe the universe and universe of universes, what is your ordering?" -n 4 -g "root ::= (expr \"=\" ws term \"\n\")+\nexpr ::= term ([-+*/] term)*\nterm ::= ident | num | \"(\" ws expr \")\" ws\nident ::= [a-z] [a-z0-9_]* ws\nnum ::= [0-9]+ ws\nws ::= [ \t\n]*"
dune exec ./bin/simple_grammar.exe -- --llamacpp -s test4 -u "http://localhost:8080" -p "consider a consecutive series of types to describe the universe and universe of universes, what is your ordering?" -n 4 -g "root ::= (expr \"=\" ws term \"\n\")+\nexpr ::= term ([-+*/] term)*\nterm ::= ident | num | \"(\" ws expr \")\" ws\nident ::= [a-z] [a-z0-9_]* ws\nnum ::= [0-9]+ ws\nws ::= [ \t\n]*" s


* grammar
We now take the filename for grammar because of quoting issues.

GRAMMAR=~/experiments/gbnf_parser/grammars/ebnf.ebnf
DS=$(date -Iseconds)
PROMPT_NAME=prompt_grammar2c.txt

dune exec bin/simple_grammar.exe -- \
--llamacpp \
-u "http://localhost:8080" \
-s "grammar_1_${DS}" \
-g $GRAMMAR \
-p $PROMPT_NAME \
-x ".txt" \
-n 6

28 changes: 12 additions & 16 deletions bin/dune
Original file line number Diff line number Diff line change
Expand Up @@ -6,64 +6,60 @@
-dtypedtree -dparsetree -dump-into-file -dlambda -annot -dlocations -dsource -drawlambda ))))

(executable
(public_name lang_agent)
(public_name la-chunker)
(name chunker )
(modules chunker )
(libraries lang_agent lwt))

(executable
(public_name argiope)
(public_name la-argiope)
(name argiope )
(modules argiope )
(libraries lang_agent uri lambdasoup lwt ezcurl-lwt ezcurl containers))
(executable
(public_name worker)
(public_name la-worker)
(name worker )
(modules worker )
(libraries lang_agent uri lambdasoup lwt ezcurl-lwt ezcurl containers))

(executable
(public_name scanner)
(public_name la-scanner)
(name scanner )
(modules scanner )
(libraries lang_agent str uri lambdasoup lwt ezcurl-lwt ezcurl containers))

(executable
(public_name simple)
(public_name la-simple)
(name simple )
(modules simple )
(libraries lang_agent str uri lambdasoup lwt ezcurl-lwt ezcurl containers))

(executable
(public_name test1)
(name test1 )
(modules test1 )
(libraries lang_agent str uri lambdasoup lwt ezcurl-lwt ezcurl containers))

(executable
(public_name execute_self)
(public_name la-execute-self)
(name execute_self )
(modules execute_self )
(libraries lang_agent str uri lambdasoup lwt ezcurl-lwt ezcurl containers))
(executable
(public_name generate_dungeon)
(public_name la-generate-dungeon)
(name generate_dungeon )
(modules generate_dungeon )
(libraries lang_agent str uri lambdasoup lwt ezcurl-lwt ezcurl containers))

(executable
(public_name generate_generic)
(public_name la-generate-generic)
(name generate_generic )
(modules generate_generic )
(libraries lang_agent str uri lambdasoup lwt ezcurl-lwt ezcurl containers))

(executable
(public_name recurse)
(public_name la-recurse)
(name recurse )
(modules recurse )
(libraries lang_agent str uri lambdasoup lwt ezcurl-lwt ezcurl containers))

(executable
(public_name simple_grammar)
(public_name la-simple-grammar)
(name simple_grammar )
(modules simple_grammar )
(libraries lang_agent str uri lambdasoup lwt ezcurl-lwt ezcurl containers))
(install (files (glob_files *)) (section bin) (package lang_agent))
9 changes: 6 additions & 3 deletions bin/generate_dungeon.ml
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,13 @@ let run_cmd args =
let cmd = String.concat " " args in
Printf.printf "Cmd: %s\n" cmd;
let inp = Unix.open_process_in cmd in
let r = In_channel.input_lines inp in
let r = In_channel.input_line inp in
In_channel.close inp;
let out = String.concat " " r in
Printf.printf "Output: %s\n" out
match r with
|Some out ->
Printf.printf "Output: %s\n" out
|None ->
Printf.printf "Output: None\n"

let () =do_apply_list run_cmd make_args type_terms

14 changes: 10 additions & 4 deletions bin/simple_grammar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,16 @@ let lc_init lang_client aurl amodel agrammar=
let a = m#lang_init() in
let c1 = m#lang_open a aurl in
let c2 = m#lang_set_model c1 amodel in
(print_endline ("DEBUG9 GRAMMAR :" ^ agrammar) );
let c3 = m#lang_set_grammar c2 agrammar in
B2LlamaCpp c3


let read_whole_file filename =
let ch = open_in_bin filename in
let s = really_input_string ch (in_channel_length ch) in
close_in ch;
s

let () =
let start = ref "" in
Expand All @@ -84,8 +90,8 @@ let () =
let opts = [
"-s", Arg.Set_string start, "startdir";
"-n", Arg.Set_int item_count, "generate count items";
"-p", Arg.Set_string prompt, "prompt";
"-g", Arg.Set_string grammar, "grammar";
"-p", Arg.Set_string prompt, "prompt filename";
"-g", Arg.Set_string grammar, "grammar filename";
"-x", Arg.Set_string suffix, "suffix";
"-m", Arg.Set_string model, "model";
"--llamacpp", Arg.Unit (fun () ->
Expand All @@ -96,7 +102,7 @@ let () =
Arg.parse opts anon_fun help_str;
Printf.printf "DEBUG3 path %s\n" !start;
(print_endline ("DEBUG4 MODEL :" ^ ! model) );
grammar := read_whole_file !grammar;
prompt := read_whole_file !prompt;
let client_param_record = lc_init !lang_client !url !model !grammar in
process_prompt !lang_client client_param_record !start !model !prompt !suffix !item_count


2 changes: 1 addition & 1 deletion build2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ eval $(opam env --switch=5.1.2+trunk)


opam install --yes ppx_yojson lwt_ppx containers lambdasoup uri ezcurl ppx_yojson_conv ppx_yojson_conv_lib ezcurl-lwt yojson
opam install --yes --deps-only .
opam install -w --yes --deps-only .
opam pin add --yes openai https://github.com/meta-introspector/openai-ocaml.git#main --update-invariant
dune build .
20 changes: 20 additions & 0 deletions grammar2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
GRAMMAR=~/experiments/gbnf_parser/grammars/ebnf.ebnf
GRAMMAR_C=$(cat $GRAMMAR)

GRAMMAR2=~/experiments/gbnf_parser/lib/sentenceParser.mly
GRAMMAR2_C=$( cat $GRAMMAR2 )

DATA=$(cat notes.org)
DS=$(date -Iseconds)
PROMPT_NAME="prompt_grammar2_${DS}.txt"

echo "Consider the following grammar between BEGINSRC and ENDSRC. BEGINSRC ${GRAMMAR2_C} ENDSRC . Please rewrite it to be more beautiful. We are going to use the following TARGET: BEGINTARGET ${GRAMMAR_C} ENDTARGET as our target grammar format. Please rewrite SRC into TARGET. " > $PROMPT_NAME

dune exec bin/simple_grammar.exe -- \
--llamacpp \
-u "http://localhost:8080" \
-s "grammar_1_${DS}" \
-g $GRAMMAR \
-p $PROMPT_NAME \
-x ".txt" \
-n 6
12 changes: 12 additions & 0 deletions grammar2b.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
GRAMMAR=~/experiments/gbnf_parser/grammars/ebnf.ebnf
DS=$(date -Iseconds)
PROMPT_NAME=prompt_grammar2.txt

dune exec bin/simple_grammar.exe -- \
--llamacpp \
-u "http://localhost:8080" \
-s "grammar_1_${DS}" \
-g $GRAMMAR \
-p $PROMPT_NAME \
-x ".txt" \
-n 6
12 changes: 12 additions & 0 deletions grammar2c.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
GRAMMAR=~/experiments/gbnf_parser/grammars/ebnf.ebnf
DS=$(date -Iseconds)
PROMPT_NAME=prompt_grammar2c.txt

dune exec bin/simple_grammar.exe -- \
--llamacpp \
-u "http://localhost:8080" \
-s "grammar_1_${DS}" \
-g $GRAMMAR \
-p $PROMPT_NAME \
-x ".txt" \
-n 6
10 changes: 10 additions & 0 deletions grammar2cs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
GRAMMAR=~/experiments/gbnf_parser/grammars/ebnf.ebnf
DS=$(date -Iseconds)
PROMPT_NAME=prompt_grammar2c.txt
PROMPT_C="$(cat $PROMPT_NAME)"
dune exec bin/simple.exe -- \
--llamacpp \
-u "http://localhost:8080" \
-p "${PROMPT_C}" \
-x ".txt" \
-n 6
70 changes: 70 additions & 0 deletions prompt_grammar2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
Consider the following grammar between BEGINSRC and ENDSRC. BEGINSRC
%{
open Syntax
%}
%token <int> Tchar
%token DASH "-"
%token CARET "^"
%token
BAR "|"
EOF ""
LPAREN "("
RPAREN ")"
QUESTION "?"
STAR "*"
PLUS "+"
NEWLINE

%token <string Positions.located>
LID "lident"
REGEX "regex"
QID "\"alias\""

%token
COLONCOLONEQUAL "::="

%start <Syntax.partial_grammar> grammar
%type <Syntax.myfactor> factor
%type <Syntax.myfactor> alternation
%type <Syntax.myfactor> modifier
%type <Syntax.myfactor> complexterms
%type <Syntax.myfactor> term
%type <Syntax.myfactor> fstar
%type <Syntax.myfactor> sterm
%type <Syntax.myfactor> char_class
%type <string Positions.located> lid
%type <string Positions.located> qid
%type <Syntax.myfactor> termfactor

TRUNCATEDPART1
ENDSRC .

Please rewrite it to be more beautiful. We are going to use the following TARGET: BEGINTARGET letter ::= "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"

digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
# removed " | "\f" | "\b"
symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?" | "\n" | "\t" | "\r"

character ::= letter | digit | symbol | "_" | " "
identifier ::= letter ( letter | digit | "_" )

#| "\f" | "\b"
S ::= ( " " | "\n" | "\t" | "\r" )

terminal ::= "'" character "'" ( character "'" ) "'"

terminator ::= (";" | ".")

term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier

factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S

concatenation ::= ( S factor S "," ? ) +
alternation ::= ( S concatenation S "|" ? ) +

rhs ::= alternation
lhs ::= identifier

rule ::= lhs S "=" S rhs S terminator

root ::= ( S rule S ) * ENDTARGET as our target grammar format. Please rewrite SRC into TARGET.
15 changes: 15 additions & 0 deletions prompt_grammar2c.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Please rewrite the following to be more beautiful.
please answer only in the following grammar.

BEGIN
terminal ::= "'" character "'" ( character "'" ) "'"
terminator ::= (";" | ".")
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S
concatenation ::= ( S factor S "," ? ) +
alternation ::= ( S concatenation S "|" ? ) +
rhs ::= alternation
lhs ::= identifier
rule ::= lhs S "=" S rhs S terminator
root ::= ( S rule S ) *
END
4 changes: 4 additions & 0 deletions simple_grammar.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
GRAMMAR=$( cat ~/experiments/gbnf_parser/grammars/ebnf.ebnf)
echo "consider a consecutive series of types to describe the universe and universe of universes, what is your ordering?" > prompt.txt

dune exec ./bin/simple_grammar.exe -- --llamacpp -s test4 -u "http://localhost:8080" -n 4 -g "$GRAMMAR" -p prompt.txt

0 comments on commit 2350d7d

Please sign in to comment.