Skip to content

Commit

Permalink
Version 0.10
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickfrey committed Apr 6, 2022
1 parent d4d06b2 commit e4108ae
Show file tree
Hide file tree
Showing 30 changed files with 1,263 additions and 332 deletions.
3 changes: 2 additions & 1 deletion GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ INCFLAGS := -I$(SRCDIR) -I$(LUAINC) -I$(INCDIR)
LDFLAGS := -g -pthread
LDLIBS := -lm -lstdc++
LIBOBJS := $(BUILDDIR)/lexer.o \
$(BUILDDIR)/automaton.o $(BUILDDIR)/automaton_tostring.o $(BUILDDIR)/automaton_parser.o \
$(BUILDDIR)/automaton.o $(BUILDDIR)/automaton_tostring.o $(BUILDDIR)/languagedef_tostring.o \
$(BUILDDIR)/automaton_structs.o $(BUILDDIR)/automaton_parser.o \
$(BUILDDIR)/typedb.o \
$(BUILDDIR)/fileio.o $(BUILDDIR)/strings.o $(BUILDDIR)/error.o
MODOBJS := $(BUILDDIR)/lualib_mewa.o \
Expand Down
7 changes: 3 additions & 4 deletions INSTALL.Ubuntu.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
Use Cmake with gcc or clang with C++17 support.

### LLVM Version
The examples of this version of _Mewa_ are based on LLVM version 10 and run also with version 11.
The examples pass with with LLVM versions 12 and 13 too, but the output of the IR differs slightly.
Some tests that compare the IR output will therefore fail with llvm-12 or llvm-13.
The examples of this version of _Mewa_ are based on LLVM version > 12 and run also with version 10 or 11.
The examples pass with with LLVM versions 10 and 11 too, but the output of the IR differs slightly.

### Prerequisites
Install packages with 'apt-get'/aptitude.
Expand Down Expand Up @@ -41,7 +40,7 @@ make PREFIX=/usr/local install
```Bash
git clone https://github.com/patrickfrey/mewa
cd mewa
git checkout -b 0.9
git checkout -b 0.10

```

Expand Down
5 changes: 2 additions & 3 deletions INSTALL.Ubuntu.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
Use Cmake with gcc or clang with C++17 support.

### LLVM Version
The examples of this version of _Mewa_ are based on LLVM version 10 and run also with version 11.
The examples pass with with LLVM versions 12 and 13 too, but the output of the IR differs slightly.
Some tests that compare the IR output will therefore fail with llvm-12 or llvm-13.
The examples of this version of _Mewa_ are based on LLVM version > 12 and run also with version 10 or 11.
The examples pass with with LLVM versions 10 and 11 too, but the output of the IR differs slightly.

### Prerequisites
Install packages with 'apt-get'/aptitude.
Expand Down
13 changes: 10 additions & 3 deletions MANPAGE
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,28 @@ Print the current version of
.B mewa
.TP
\fB\-V\fR, \fB\--verbose\fR
Do verbose debug output to stderr.
Verbose debug output to stderr.
.TP
\fB\-g\fR, \fB\--generate-compiler\fR
Do generate a compiler as a
Generate a compiler as a
.BR Lua
module described by the grammar in the input file.
.TP
\fB\-s\fR, \fB\--generate-template\fR
Do generate a template for your
Generate a template for your
.BR Lua
module implementing the type system.
Extract all
.BR Lua
function calls from the grammar and prints their empty implementation stubs to the output. No debug output is provided. Be careful using this option. It is only useful after the initial definition of the grammar of your language. It overwrites previous definitions without merging with your edits in a previously generated version.
.TP
\fB\-l\fR, \fB\--generate-language\fR
Generate a
.BR Lua
table with the language description parsed for
.BR Lua
scripts generating descriptions for interfacing with other tools.
.TP
\fB\-o\fR, \fB\--output=\fR \fIfile\fR
Write the output (parsing tables) to \fIfile\fR instead of stdout.
.TP
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.9
0.10
15 changes: 8 additions & 7 deletions doc/grammar.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ The following commands are known:

An example of a command-line parser can be found [here](../examples/cmdlinearg.lua).

* **%** **COMMENT** _start_ _end_ **;** Defines the content between the patterns _start_ and _end_ defined as regular expression quoted in single or double quotes as a comment. Comments are ignored by the lexer and thus by the compiler.
* **%** **COMMENT** _start_ **;** Defines the content starting with the pattern _start_ defined as regular expression quoted in single or double quotes until the next end of a line as a comment. Comments are ignored by the lexer and thus by the compiler.
* **%** **COMMENT** _open_ _close_ **;** Defines the content between the patterns _open_ and _close_ defined as regular expression quoted in single or double quotes as a comment. Comments are ignored by the lexer and thus by the compiler.
* **%** **COMMENT** _open_ **;** Defines the content starting with the pattern _open_ defined as regular expression quoted in single or double quotes until the next end of a line as a comment. Comments are ignored by the lexer and thus by the compiler.
* **%** **IGNORE** _pattern_ **;** Defines a token matching this pattern defined as regular expression quoted in single or double quotes as invisible. It is ignored by the lexer and thus by the compiler. Hence it does not need a name.
* **%** **BAD** _name_ **;** Defines the name of the error token of the lexer. Has no implications but for the debugging output.
* **%** **INDENTL** _open_ _close_ _nl_ _tabsize_ **;** Defines lexems issued for indentiation. Used to implement **off-side rule** languages. The tokens issued can be referenced in the grammar by the names specified as command arguments.
Expand All @@ -50,15 +50,16 @@ The following commands are known:
### Lexeme/Token Declarations
Lexeme declarations start with an identifier followed by a colon '**:**', a pattern matching the lexeme, and an optional selection index:

* _lexemename_ **:** _pattern_ **;**
* _lexemename_ **:** _pattern_ _select_ **;**
* _name_ **:** _pattern_ **;**
* _name_ **:** _pattern_ _select_ **;**

The name _lexemename_ defines the identifier this lexeme can be referred to as a token in the production declarations of the grammar.
_name_ defines the identifier this lexeme can be referred to as a token in the production declarations of the grammar.
The regular expression string _pattern_ quoted in single or double quotes defines the pattern that matches the _lexeme_.
You can have multiple declarations with the same name.
You can have multiple declarations with the same identifier _name_.
The optional integer number _select_ defines the index of the subexpression of the regular expression match to select as the value of the _lexeme_ recognized.
If _0_ or nothing is specified the whole expression match is chosen as the _token_ value emitted.
If multiple patterns match at the same source position then the longest match is emitted as the _token_ value. If two matches have the same length, the first declaration is chosen. If one of the matches is a keyword or an operator, it is always the first choice.
If multiple patterns match at the same source position then the longest match is emitted as the _token_ value. If two matches have the same length, the first declaration is chosen.
If one of the matches is a keyword or an operator, it is always the first choice.
Keywords and operators of the grammar are not declared in the lexer section but are referred to as strings in the production declarations of the grammar.

### Production Declarations
Expand Down
2 changes: 1 addition & 1 deletion doc/intro.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ make install
```Bash
git clone https://github.com/patrickfrey/mewa
cd mewa
git checkout -b 0.9
git checkout -b 0.10

```
#### Configure to find Lua includes and to write the file Lua.inc included by make
Expand Down
Binary file modified doc/program_mewa.pdf
Binary file not shown.
32 changes: 26 additions & 6 deletions examples/language1/grammar.g
Original file line number Diff line number Diff line change
@@ -1,20 +1,40 @@
% LANGUAGE language1;
% TYPESYSTEM "language1/typesystem";
% CMDLINE "cmdlinearg";
# @description C style comments
# @rule COMMENT ::= "/*" ... "*/"
% COMMENT "/*" "*/";
# @description C++ style end of line comments
# @rule COMMENT ::= "//" ... "\\n"
% COMMENT "//";

# @rule BOOLEAN ::= "true" | "false"
BOOLEAN : '((true)|(false))';
# @rule LOALPHA ::= "a"|"b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"|"l"|"m"|"n"|"o"|"p"|"q"|"r"|"s"|"t"|"u"|"v"|"w"|"x"|"y"|"z"
# HIALPHA ::= "A"|"B"|"B"|"D"|"E"|"F"|"G"|"H"|"I"|"J"|"K"|"L"|"M"|"N"|"O"|"P"|"Q"|"R"|"S"|"T"|"U"|"V"|"W"|"X"|"Y"|"Z"
# USCORE ::= "_"
# IDENT ::= (LOALPHA | HIALPHA | USCORE) (LOALPHA | HIALPHA | USCORE | DIGIT)*
IDENT : '[a-zA-Z_]+[a-zA-Z_0-9]*';
# @rule DQSTRING ::= [double quoted string]
# @description Double quoted string with backslash are used for escaping double quotes and back slashes in the string
DQSTRING: '["]((([^\\"\n]+)|([\\][^"\n]))*)["]' 1;
# @rule SQSTRING ::= [single quoted string]
# @description Single quoted string with backslash are used for escaping single quotes and back slashes in the string
SQSTRING: "[']((([^\\'\n]+)|([\\][^'\n]))*)[']" 1;
# @rule DIGIT ::= ("0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9")
# UINTEGER ::= DIGIT*
UINTEGER: '[0123456789]+';
# @rule FLOAT ::= DIGIT* "." DIGIT+
FLOAT : '[0123456789]*[.][0123456789]+';
# @rule EXPONENT ::= ("E"|"e") (("-"|"+") DIGIT+ | DIGIT+)
# FLOAT ::= DIGIT* "." DIGIT+ EXPONENT
FLOAT : '[0123456789]*[.][0123456789]+[Ee][+-]{0,1}[0123456789]+';
# @description Numbers must not be followed immediately by an identifier
ILLEGAL : '[0123456789]+[A-Za-z_]';
ILLEGAL : '[0123456789]*[.][0123456789]+[A-Za-z_]';
ILLEGAL : '[0123456789]*[.][0123456789]+[Ee][+-]{0,1}[0123456789]+[A-Za-z_]';

# @startsymbol program
program = extern_definitionlist free_definitionlist main_procedure (program)
;
extern_definitionlist = extern_definition extern_definitionlist
Expand Down Expand Up @@ -71,7 +91,7 @@ inclass_definition = typedefinition ";" (definition 1)
| operatordefinition (definition_2pass 4)
| constructordefinition (definition_2pass 4)
;
free_definition = namespacedefinition
free_definition = namespacedefinition
| typedefinition ";" (definition 1)
| variabledefinition ";" (definition 1)
| structdefinition (definition 1)
Expand Down Expand Up @@ -125,7 +145,7 @@ classdefinition = "class" IDENT "{" inclass_definitionlist "}" (classdef)
| "class" IDENT ":" inheritlist "{" inclass_definitionlist "}" (classdef)
| "generic" "class" IDENT "[" generic_header "]"
"{" inclass_definitionlist "}" (generic_classdef)
| "generic" "class" IDENT "[" generic_header "]"
| "generic" "class" IDENT "[" generic_header "]"
":" inheritlist "{" inclass_definitionlist "}" (generic_classdef)
;
linkage = "private" (linkage {private=true, linkage="internal", explicit=true})
Expand Down Expand Up @@ -280,10 +300,10 @@ expression/L4 = expression "||" expression (>>binop "||")
expression/L5 = expression "&&" expression (>>binop "&&")
;
expression/L6 = expression "|" expression (>>binop "|")
;
;
expression/L7 = expression "^" expression (>>binop "^")
| expression "&" expression (>>binop "&")
;
;
expression/L8 = expression "==" expression (>>binop "==")
| expression "!=" expression (>>binop "!=")
| expression "<=" expression (>>binop "<=")
Expand All @@ -295,9 +315,9 @@ expression/L9 = expression "+" expression (>>binop "+")
| expression "-" expression (>>binop "-")
| "&" expression (operator_address "&")
| "-" expression (>>unop "-")
| "+" expression (>>unop "+")
| "+" expression (>>unop "+")
| "~" expression (>>unop "~")
| "!" expression (>>unop "!")
| "!" expression (>>unop "!")
;
expression/L10 = expression "*" expression (>>binop "*")
| expression "/" expression (>>binop "/")
Expand Down
108 changes: 108 additions & 0 deletions examples/printGrammarFromImage.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
-- This module exports the function "printLanguageDef" that builds a mewa language description file (equivalent to the original ".g" file)
-- from a Lua table generated with the mewa option "--generate-language" or "-l"
--
-- This reverse process is in itself not very useful but for testing and as an example.
--
require "io"
require "string"
require "math"

local reserved = {"op","name","pattern","open","close","tabsize","nl","select","priority","left","right","scope","call","line"}
local image = {}

local function contains( tb, val)
for i=1,#tb do
if tb[i] == val then
return true
end
end
return false
end

local function printDecoratorsAsComments( rule)
for key, val in pairs(rule) do
if not contains( reserved, key) and #val > 0 then
print( "# @" .. key .. " " .. val[ 1])
for ii=2,#val do
print( "#\t" .. val[ ii])
end
end
end
end

local function quoteString( str)
if string.find( str, "\"") then
return "\'" .. str .. "\'"
else
return "\"" .. str .. "\""
end
end

local function productionElementListToString( right)
local rt = nil
for idx,elem in ipairs(right) do
if elem.type == "name" then
value = elem.value
elseif elem.type == "symbol" then
value = quoteString(elem.value)
else
error( "unknown production element type '" .. elem.type .. "'")
end
rt = not rt and value or rt .. " " .. value
end
return rt
end

function image.printLanguageDef( def)
print( "% LANGUAGE " .. def.LANGUAGE .. ";")
print( "% TYPESYSTEM \"" .. def.TYPESYSTEM .. "\";")
print( "% CMDLINE \"" .. def.CMDLINE .. "\";")
prev_prodname = nil
rulestr = nil
for idx,rule in ipairs( def.RULES ) do
printDecoratorsAsComments( rule)
if rule.op == "COMMENT" then
if rule.close then
print( "% COMMENT " .. quoteString( rule.open) .. " " .. quoteString( rule.close) .. ";")
else
print( "% COMMENT " .. quoteString( rule.open) .. ";")
end
elseif rule.op == "INDENTL" then
print( "% INDENTL " .. quoteString( rule.open) .. " " .. quoteString( rule.close) .. " " .. quoteString( rule.nl) .. " " .. quoteString( rule.tabsize) .. ";")
elseif rule.op == "BAD" then
print( "% BAD " .. quoteString( rule.name) .. ";")
elseif rule.op == "IGNORE" then
print( "% IGNORE " .. quoteString( rule.pattern) .. ";")
elseif rule.op == "TOKEN" then
if rule.select then
print( rule.name .. ": " .. quoteString( rule.pattern) .. " " .. rule.select .. ";")
else
print( rule.name .. ": " .. quoteString( rule.pattern) .. ";")
end
elseif rule.op == "PROD" then
left = rule.priority and rule.left .. "/" .. rule.priority or rule.left
if prev_prodname == left then
indent = string.rep( "\t", 3)
rulestr = rulestr .. "\n" .. indent .. "| "
else
indent = string.rep( "\t", math.max( 3 - math.floor(string.len( left) / 8), 0))
if rulestr then
print( rulestr .. ";")
end
rulestr = left .. indent .. "= "
prev_prodname = left
end
if #rule.right > 0 then
rulestr = rulestr .. productionElementListToString( rule.right)
end
if rule.call or rule.scope then
rulestr = rulestr .. " (" .. (rule.scope or "") .. (rule.call or "") .. ")"
end
end
end
if rulestr then
print( rulestr .. ";")
end
end

return image
14 changes: 7 additions & 7 deletions src/automaton.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
Copyright (c) 2020 Patrick P. Frey
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
Expand Down Expand Up @@ -108,7 +108,7 @@ class Automaton
}
static ActionKey unpack( int pkg) noexcept
{
static_assert (ShiftState + ShiftTerminal <= 31, "sizeof packed action key structure");
static_assert (ShiftState + ShiftTerminal <= 31, "sizeof packed action key structure");

return ActionKey( pkg >> ShiftTerminal /*state*/, pkg & MaskTerminal /*terminal*/);
}
Expand Down Expand Up @@ -156,12 +156,12 @@ class Automaton
}
static Action unpack( int pkg) noexcept
{
static_assert (ShiftProductionLength + ShiftCall + ShiftState + ShiftScopeFlag + ShiftActionType <= 31, "sizeof packed action structure");
static_assert (ShiftProductionLength + ShiftCall + ShiftState + ShiftScopeFlag + ShiftActionType <= 31, "sizeof packed action structure");

return Action( (Type)((pkg >> (ShiftProductionLength + ShiftCall + ShiftState + ShiftScopeFlag)) & MaskActionType)/*type*/,
(ScopeFlag)((pkg >> (ShiftProductionLength + ShiftCall + ShiftState)) & MaskScopeFlag)/*scopeflag*/,
(pkg >> (ShiftProductionLength + ShiftCall)) & MaskState/*value*/,
(pkg >> ShiftProductionLength) & MaskCall/*call*/,
(pkg >> ShiftProductionLength) & MaskCall/*call*/,
(pkg) & MaskProductionLength/*count*/);
}

Expand Down Expand Up @@ -202,7 +202,7 @@ class Automaton
}
static GotoKey unpack( int pkg) noexcept
{
static_assert (ShiftState + ShiftTerminal <= 31, "sizeof packed goto-key structure");
static_assert (ShiftState + ShiftTerminal <= 31, "sizeof packed goto-key structure");

return GotoKey( pkg >> ShiftTerminal /*state*/, pkg & MaskTerminal /*nonterminal*/);
}
Expand Down Expand Up @@ -234,7 +234,7 @@ class Automaton
}
static Goto unpack( int pkg) noexcept
{
static_assert (ShiftState <= 31, "sizeof packed goto structure");
static_assert (ShiftState <= 31, "sizeof packed goto structure");

return Goto( pkg);
}
Expand All @@ -243,7 +243,7 @@ class Automaton
short m_state;
};

/// \brief Encoded node call reference attached to a production to be performed after its reduction
/// \brief Encoded node call reference attached to a production to be performed after its reduction
class Call
{
public:
Expand Down
Loading

0 comments on commit e4108ae

Please sign in to comment.