Skip to content

Commit

Permalink
data dungeon
Browse files Browse the repository at this point in the history
  • Loading branch information
mike dupont committed Feb 18, 2024
1 parent f6e5ddb commit 7349d88
Show file tree
Hide file tree
Showing 79 changed files with 11,283 additions and 3 deletions.
14 changes: 13 additions & 1 deletion bin/simple.ml
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,18 @@ let lc_init lang_client aurl amodel =
B2LlamaCpp c2



let read_whole_file filename =
(print_endline ("DEBUG1 read :" ^ filename));
let ch = open_in_bin filename in
let s = really_input_string ch (in_channel_length ch) in
close_in ch;
s

let () =
let start = ref "" in
let item_count = ref 1 in
let prompt = ref "" in
let prompt_file = ref "" in
let model = ref "mistral" in
let suffix = ref ".out" in

Expand All @@ -105,6 +112,7 @@ let () =
"-s", Arg.Set_string start, "startdir";
"-n", Arg.Set_int item_count, "generate count items";
"-p", Arg.Set_string prompt, "prompt";
"-f", Arg.Set_string prompt_file, "prompt file";
"-x", Arg.Set_string suffix, "suffix";
"-m", Arg.Set_string model, "model";
"--openai", Arg.Unit (fun () ->
Expand All @@ -119,8 +127,12 @@ let () =
"-u", Arg.Set_string url, "url";
] |> Arg.align in
Arg.parse opts anon_fun help_str;
if !prompt_file != "" then
prompt := read_whole_file !prompt_file;

Printf.printf "DEBUG3 path %s\n" !start;
(print_endline ("DEBUG4 MODEL :" ^ ! model) );

let client_param_record = lc_init !lang_client !url !model in
process_prompt !lang_client client_param_record !start !model !prompt !suffix !item_count

Expand Down
2 changes: 1 addition & 1 deletion bin/simple_grammar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ let aux dir suffix prompt1 client1 param_record =
)
else
(
print_endline ("going to create" ^ full_out_path);
print_endline ("Calling server. waiting for response.");
do_one prompt1 client1 param_record full_out_path
)

Expand Down
71 changes: 71 additions & 0 deletions data/dungeon/dungeon_1_2024-02-18T11:25:29-05:00_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

#+begin_src input
ENDSRC . Please rewrite it to be more beautiful and in EBNF form. We are going to use the following TARGET: BEGINTARGET comment ::= "#" [a-zA-Z0-9 \t]*
letter ::= [a-zA-Z]
digit ::= [0-9]
S ::= ( " " | "\n" | "\t" | "\r" )
symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?"
character ::= letter | digit | symbol | "_" | " "
identifier ::= letter ( letter | digit | "_" )*
terminal ::= "'" character "'" ( character "'" ) "'"
terminator ::= (";" | ".")
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S
concatenation ::= ( S factor S "," ? ) +
alternation ::= ( S concatenation S "|" ? ) +
rhs ::= alternation
lhs ::= identifier
rule ::= lhs S "=" S rhs S terminator comment*
root ::= comment* ( S rule S ) * ENDTARGET as our target grammar format. Please rewrite SRC into TARGET. You are to only respond using the target custom GBNF grammar and put descriptions or comments at the end of the rule with a #, see the grammar for rules. Please start with a comment using '#' to start comments on a new line.

#+end_src
#+begin_src output mixtral
Result: # This is a rewritten version of SRC in the TARGET format

comment ::= "#" [a-zA-Z0-9 \t]*

letter ::= [a-zA-Z]
digit ::= [0-9]

S ::= ( " " | "\n" | "\t" | "\r" )

symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?"

character ::= letter | digit | symbol | "_" | " "

identifier ::= letter ( letter | digit | "_" )*

terminal ::= "'" character "'" ( character "'" ) "'"

terminator ::= (";" | ".")

term ::= "(" S rhs S ")"
| "[" S rhs S "]"
| "{" S rhs S "}"
| terminal
| identifier

factor ::= term S "?"
| term S "*"
| term S "+"
| term S "-" S term
| term S

concatenation ::= (S factor S "," ? )+

alternation ::= (S concatenation S "|" ? )+

rhs ::= alternation

lhs ::= identifier

rule ::= lhs S "=" S rhs S terminator comment*

root ::= comment* (S rule S)* ENDTARGET

; This grammar defines a simple expression language using GBNF
; It includes rules for comments, identifiers, terminals, terms, factors, concatenation and alternation
; Each rule has a description or comment at the end of it to explain its purpose
; The grammar can be used to parse expressions written in this language
; It is designed to be easily extended with new rules and terminal symbols
#+end_src
53 changes: 53 additions & 0 deletions data/dungeon/dungeon_1_2024-02-18T11:25:29-05:00_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

#+begin_src input
ENDSRC . Please rewrite it to be more beautiful and in EBNF form. We are going to use the following TARGET: BEGINTARGET comment ::= "#" [a-zA-Z0-9 \t]*
letter ::= [a-zA-Z]
digit ::= [0-9]
S ::= ( " " | "\n" | "\t" | "\r" )
symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?"
character ::= letter | digit | symbol | "_" | " "
identifier ::= letter ( letter | digit | "_" )*
terminal ::= "'" character "'" ( character "'" ) "'"
terminator ::= (";" | ".")
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S
concatenation ::= ( S factor S "," ? ) +
alternation ::= ( S concatenation S "|" ? ) +
rhs ::= alternation
lhs ::= identifier
rule ::= lhs S "=" S rhs S terminator comment*
root ::= comment* ( S rule S ) * ENDTARGET as our target grammar format. Please rewrite SRC into TARGET. You are to only respond using the target custom GBNF grammar and put descriptions or comments at the end of the rule with a #, see the grammar for rules. Please start with a comment using '#' to start comments on a new line.

#+end_src
#+begin_src output mixtral
Result: BEGINTARGET

# Split the production rule into two lines for readability
production_rule ::= lhs S "=" S rhs S terminator (comment | empty)
lhs ::= identifier
rhs ::= alternation

# Combine multiple consecutive whitespace characters into a single S
S ::= ( " " | "\n" | "\t" | "\r" )+

# Add rule for empty string
empty ::=

# Allow optional comments after any grammar symbol
(comment | empty) ::= comment | empty

# Update the definition of alternation to allow for empty alternatives
alternation ::= ( S concatenation S "|" ? )* ( S concatenation S | empty )
concatenation ::= ( S factor S "," ? )+
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier
terminal ::= "'" character "'" (character "'")*
character ::= letter | digit | symbol | "_" | " "
identifier ::= letter (letter | digit | "_")*

# Add rule for comment leader
comment_leader ::= "#"
comment ::= comment\_leader [a-zA-Z0-9 \t]* "\n"

ENDTARGET
#+end_src
67 changes: 67 additions & 0 deletions data/dungeon/dungeon_1_2024-02-18T11:25:29-05:00_3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@

#+begin_src input
ENDSRC . Please rewrite it to be more beautiful and in EBNF form. We are going to use the following TARGET: BEGINTARGET comment ::= "#" [a-zA-Z0-9 \t]*
letter ::= [a-zA-Z]
digit ::= [0-9]
S ::= ( " " | "\n" | "\t" | "\r" )
symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?"
character ::= letter | digit | symbol | "_" | " "
identifier ::= letter ( letter | digit | "_" )*
terminal ::= "'" character "'" ( character "'" ) "'"
terminator ::= (";" | ".")
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S
concatenation ::= ( S factor S "," ? ) +
alternation ::= ( S concatenation S "|" ? ) +
rhs ::= alternation
lhs ::= identifier
rule ::= lhs S "=" S rhs S terminator comment*
root ::= comment* ( S rule S ) * ENDTARGET as our target grammar format. Please rewrite SRC into TARGET. You are to only respond using the target custom GBNF grammar and put descriptions or comments at the end of the rule with a #, see the grammar for rules. Please start with a comment using '#' to start comments on a new line.

#+end_src
#+begin_src output mixtral
Result: # This is a rewritten version of SRC in TARGET format

comment ::= "#" [a-zA-Z0-9 \t]*

letter ::= [a-zA-Z]
digit ::= [0-9]
S ::= ( " " | "\n" | "\t" | "\r" )

symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?"
character ::= letter | digit | symbol | "_" | " "
identifier ::= letter ( letter | digit | "_" )*

terminal ::= "'" character "'" ( character "'" ) "'"
terminator ::= (";" | ".")

term ::= "(" S rhs S ")"
| "[" S rhs S "]"
| "{" S rhs S "}"
| terminal
| identifier

factor ::= term S "?"
| term S "*"
| term S "+"
| term S "-" S term
| term S

concatenation ::= ( S factor S "," ? ) +

alternation ::= ( S concatenation S "|" ? ) +

rhs ::= alternation
lhs ::= identifier

rule ::= lhs S "=" S rhs S terminator comment*

root ::= comment* ( S rule S ) * ENDTARGET

; Comments:
; - indentation is used to visually group elements of a rule
; - vertical bars are separated by spaces for readability
; - concatenations and alternations only have a single space after 'S'
; - no need to repeat 'S' when it appears multiple times in a row (e.g. S rhs S can be shortened to S rhs S)
; - comments are added at the end of rules, starting with '#'
#+end_src
75 changes: 75 additions & 0 deletions data/dungeon/dungeon_1_2024-02-18T11:25:29-05:00_4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@

#+begin_src input
ENDSRC . Please rewrite it to be more beautiful and in EBNF form. We are going to use the following TARGET: BEGINTARGET comment ::= "#" [a-zA-Z0-9 \t]*
letter ::= [a-zA-Z]
digit ::= [0-9]
S ::= ( " " | "\n" | "\t" | "\r" )
symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?"
character ::= letter | digit | symbol | "_" | " "
identifier ::= letter ( letter | digit | "_" )*
terminal ::= "'" character "'" ( character "'" ) "'"
terminator ::= (";" | ".")
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S
concatenation ::= ( S factor S "," ? ) +
alternation ::= ( S concatenation S "|" ? ) +
rhs ::= alternation
lhs ::= identifier
rule ::= lhs S "=" S rhs S terminator comment*
root ::= comment* ( S rule S ) * ENDTARGET as our target grammar format. Please rewrite SRC into TARGET. You are to only respond using the target custom GBNF grammar and put descriptions or comments at the end of the rule with a #, see the grammar for rules. Please start with a comment using '#' to start comments on a new line.

#+end_src
#+begin_src output mixtral
Result: BEGINTARGET

# Rule for matching comments in the source code
comment ::= "#" [a-zA-Z0-9 \t]*

# Individual letters and digits
letter ::= [a-zA-Z]
digit ::= [0-9]

# Whitespace characters
S ::= ( " " | "\n" | "\t" | "\r" )

# Various symbols used in the grammar
symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?"

# A single character that can be a letter, digit, symbol, underscore or space
character ::= letter | digit | symbol | "_" | " "

# An identifier is a letter followed by any number of letters, digits or underscores
identifier ::= letter ( letter | digit | "_" )*

# A terminal symbol consists of an apostrophe, a character, and then another optional apostrophe-character pair
terminal ::= "'" character "'" ( character "'" ) "'"

# Terminator symbols are semicolons or periods
terminator ::= (";" | ".")

# A term can be a parenthesized expression, bracketed expression, braced expression, terminal symbol, or an identifier
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier

# Factors are terms followed by an optional '?', '*', or '+' symbol, or another term with an optional '-' sign in between
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S

# Concatenations are sequences of one or more whitespace-separated factors, each followed by an optional comma
concatenation ::= ( S factor S "," ? ) +

# Alternations are concatenations preceded by zero or more whitespace and followed by zero or one '|' symbols
alternation ::= ( S concatenation S "|" ? ) +

# The right-hand side of a rule is a sequence of one or more alternations, separated by whitespace
rhs ::= alternation

# The left-hand side of a rule is an identifier
lhs ::= identifier

# A rule consists of a left-hand side, followed by zero or more whitespace, an equals sign, zero or more whitespace, a right-hand side, zero or more whitespace, and a terminator symbol, followed by zero or more comment symbols
rule ::= lhs S "=" S rhs S terminator comment*

# The root of the grammar consists of zero or more comments, followed by zero or more rules separated by whitespace, and ending with an 'ENDTARGET' symbol
root ::= comment* ( S rule S ) * ENDTARGET

ENDTARGET
#+end_src
54 changes: 54 additions & 0 deletions data/dungeon/dungeon_1_2024-02-18T11:25:29-05:00_5.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@

#+begin_src input
ENDSRC . Please rewrite it to be more beautiful and in EBNF form. We are going to use the following TARGET: BEGINTARGET comment ::= "#" [a-zA-Z0-9 \t]*
letter ::= [a-zA-Z]
digit ::= [0-9]
S ::= ( " " | "\n" | "\t" | "\r" )
symbol ::= "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" | "'" | "=" | "|" | "." | "," | ";" | "-" | "+" | "*" | "?"
character ::= letter | digit | symbol | "_" | " "
identifier ::= letter ( letter | digit | "_" )*
terminal ::= "'" character "'" ( character "'" ) "'"
terminator ::= (";" | ".")
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S
concatenation ::= ( S factor S "," ? ) +
alternation ::= ( S concatenation S "|" ? ) +
rhs ::= alternation
lhs ::= identifier
rule ::= lhs S "=" S rhs S terminator comment*
root ::= comment* ( S rule S ) * ENDTARGET as our target grammar format. Please rewrite SRC into TARGET. You are to only respond using the target custom GBNF grammar and put descriptions or comments at the end of the rule with a #, see the grammar for rules. Please start with a comment using '#' to start comments on a new line.

#+end_src
#+begin_src output mixtral
Result: BEGINTARGET

# A production is defined as a non-terminal symbol followed by an '=' sign,
# then one or more alternatives separated by '|', and ending in a terminator.
# There can be zero or more comments following the terminator.
production ::= lhs "=" rhs terminator (comment)*

# A non-terminal symbol is defined as an identifier followed by optional whitespace.
lhs ::= identifier S

# The right hand side of a production consists of one or more alternatives,
# separated by '|'. Each alternative is made up of one or more concatenations,
# separated by whitespace.
rhs ::= alternation
alternation ::= (concatenation S)+ ("|" S)?
concatenation ::= (factor S ","?)+
factor ::= term S [ "?" | "*" | "+" | (“-” S term) ]

# A terminal symbol is defined as a single quote, followed by a character,
# possibly followed by more characters enclosed in quotes, and ending with a single quote.
terminal ::= "'" character (character "'")* "'"

# A rule consists of a production, optionally preceded and/or followed by whitespace,
# and zero or more comments following the production on the same line.
rule ::= S production S

# The root symbol is defined as any number of comments, followed by one or more rules,
# separated and/or preceded by whitespace, and ending in ENDTARGET.
root ::= comment* (S rule S)* ENDTARGET

ENDTARGET
#+end_src
Loading

0 comments on commit 7349d88

Please sign in to comment.