Skip to content

Commit

Permalink
Finilized building the AST
Browse files Browse the repository at this point in the history
  • Loading branch information
datawater committed Jun 24, 2024
1 parent 0b53a25 commit 82d26c5
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 67 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ data/*.zip
data/twic*.pgn
mkramdisk
*.data
*.perf
*.perf
log.log
15 changes: 11 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ license = "GPL-3.0"
# clap = { version = "4.5.7", features = ["derive"] }
clap = { version = "0.1.0", path = "clap-dynamic", package = "clap-dynamic" }
clap_complete = "4.5.5"
litemap = "0.7.3"
memmap2 = "0.9.4"
pgn-lexer = { git = "https://github.com/datawater/pgn-lexer" }

Expand Down
52 changes: 50 additions & 2 deletions NOTICE.html
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ <h2>Overview of licenses:</h2>
<li><a href="#Apache-2.0">Apache License 2.0</a> (32)</li>
<li><a href="#MIT">MIT License</a> (4)</li>
<li><a href="#GPL-3.0">GNU General Public License v3.0 only</a> (2)</li>
<li><a href="#Unicode-3.0">Unicode License v3</a> (1)</li>
<li><a href="#Unicode-DFS-2016">Unicode License Agreement - Data Files and Software (2016)</a> (1)</li>
<li><a href="#Unlicense">The Unlicense</a> (1)</li>
</ul>
Expand Down Expand Up @@ -915,7 +916,7 @@ <h4>Used by:</h4>
<li><a href=" https://github.com/rust-lang/libc ">libc 0.2.155</a></li>
<li><a href=" https://github.com/dtolnay/proc-macro2 ">proc-macro2 1.0.86</a></li>
<li><a href=" https://github.com/dtolnay/quote ">quote 1.0.36</a></li>
<li><a href=" https://github.com/dtolnay/syn ">syn 2.0.67</a></li>
<li><a href=" https://github.com/dtolnay/syn ">syn 2.0.68</a></li>
<li><a href=" https://github.com/dtolnay/unicode-ident ">unicode-ident 1.0.12</a></li>
<li><a href=" https://github.com/alacritty/vte ">utf8parse 0.2.2</a></li>
</ul>
Expand Down Expand Up @@ -1773,7 +1774,7 @@ <h4>Used by:</h4>
<h3 id="GPL-3.0">GNU General Public License v3.0 only</h3>
<h4>Used by:</h4>
<ul class="license-used-by">
<li><a href=" https://github.com/datawater/pgn-lexer ">pgn-lexer 0.2.1-alpha</a></li>
<li><a href=" https://github.com/datawater/pgn-lexer ">pgn-lexer 0.2.2-alpha</a></li>
</ul>
<pre class="license-text"> GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Expand Down Expand Up @@ -2789,6 +2790,53 @@ <h4>Used by:</h4>
<pre class="license-text">This project is dual-licensed under the Unlicense and MIT licenses.

You may use this code under the terms of either license.
</pre>
</li>
<li class="license">
<h3 id="Unicode-3.0">Unicode License v3</h3>
<h4>Used by:</h4>
<ul class="license-used-by">
<li><a href=" https://github.com/unicode-org/icu4x ">litemap 0.7.3</a></li>
</ul>
<pre class="license-text">UNICODE LICENSE V3

COPYRIGHT AND PERMISSION NOTICE

Copyright © 1991-2023 Unicode, Inc.

NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.

Permission is hereby granted, free of charge, to any person obtaining a
copy of data files and any associated documentation (the &quot;Data Files&quot;) or
software and any associated documentation (the &quot;Software&quot;) to deal in the
Data Files or Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Data Files or Software, and to permit persons to whom the
Data Files or Software are furnished to do so, provided that either (a)
this copyright and permission notice appear with all copies of the Data
Files or Software, or (b) this copyright and permission notice appear in
associated Documentation.

THE DATA FILES AND SOFTWARE ARE PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS.

IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
FILES OR SOFTWARE.

Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.
</pre>
</li>
<li class="license">
Expand Down
3 changes: 2 additions & 1 deletion about.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ accepted = [
"Unicode-DFS-2016",
"GPL-3.0",
"MPL-2.0",
"Unlicense"
"Unlicense",
"Unicode-3.0"
]
21 changes: 8 additions & 13 deletions data/with_varation_and_comments.pgn
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
[Event "prep for tournament: Grunfeld as black"]
[Site "https://lichess.org/study/Hf31t9sp/GZVgLZYn"]
[Result "*"]
[Variant "Standard"]
[ECO "D86"]
[Opening "Grünfeld Defense: Exchange Variation, Classical Variation"]
[Annotator "https://lichess.org/@/peterisacutekitten"]
[UTCDate "2022.09.12"]
[UTCTime "08:36:57"]
[E "E"]
[S "S"]

1. d4 { [%cal Gg8f6] } 1... Nf6 2. c4 g6 3. Nc3 d5 4. cxd5 (4. Nf3 Bg7 { [%cal Gc1g5] } 5. Qb3 (5. cxd5 Nxd5 6. e4 Nxc3 7. bxc3) (5. Bg5 { [%cal Gf6e4] } 5... Ne4 6. cxd5 (6. Nxe4 dxe4 7. Nd2 Qxd4) 6... Nxg5 7. Nxg5 e6 8. Qd2 exd5 9. Qe3+ Kf8 10. Qf4 { [%csl Gg4] } 10... Bf6 11. h4 h6 12. Nf3 Kg7 13. O-O-O Be6 14. e3 Nd7 15. g4 c6 16. g5 (16. h5 g5) (16. Bd3 Qb8 (16... b5) (16... a5 17. g5 hxg5 18. hxg5 Be7) 17. Qxb8 Raxb8) 16... hxg5 17. hxg5) 5... dxc4 6. Qxc4 O-O { [%cal Gd4d5] } 7. e4 Nc6 (7... a6 8. Be2 b5 9. Qb3 c5 10. dxc5 Be6 11. Qc2 Nbd7 12. Be3 Rc8 13. b4 Nxe4 14. Nxe4 (14. Bd2 Ndxc5 15. bxc5 Nxc3 16. Bxc3 Rxc5) 14... Bxa1) 8. Be2 Bg4 9. d5 Bxf3 10. Bxf3 Ne5 11. Qe2) 4... Nxd5 5. e4 Nxc3 6. bxc3 Bg7 { There are a few main lines after this move, Bc4, Nf3 and Be3. First
We'll cover Bc4 } { [%cal Gf1c4] } 7. Bc4 (7. Nf3 c5 8. Be3 (8. Rb1 O-O 9. Be2 (9. Bd2) 9... cxd4 10. cxd4 Qa5+ 11. Bd2 Qxa2 12. O-O Bg4 { [%cal Gb1b7] } 13. Rxb7 { Aronian, L. - Svidler, P., 1/2-1/2, 2nd Norway Chess 2014, https://lichess.org/MEvCmKtn } 13... Bxf3 14. Bxf3 Bxd4 15. Bb4 Rd8 16. Qc1 Na6 17. Bxe7 Rac8 18. Qf4 Nc5 19. Bxd8 Nxb7 20. Bf6 Bxf6 21. Qxf6 Nc5) 8... Qa5 { [%cal Gd1d2] } (8... Nc6) (8... O-O 9. Qd2 Qa5 10. Rc1 Rd8 11. d5 e6 12. Bg5 f6 13. Be3 Nc6) 9. Qd2 Nc6 { [%csl Gb1,Gb7][%cal Gb1b7] } 10. Rb1 a6 { [%cal Gb1c1] } 11. Rc1 cxd4 12. cxd4 Qxd2+ 13. Kxd2 (13. Bxd2) 13... e6 { Again, The engine thinks that this position is a complete
draw } (13... f5 14. d5 (14. e5 Be6 15. Bc4 Bxc4 16. Rxc4 O-O 17. Ke2 Rad8 18. Rb1 Rd7 19. g3 e6 20. Ng5 Re8 21. h4 Na5 22. Ra4 Nc6) 14... Nb4 15. Bc5 (15. Bc4 fxe4 16. Ng5 b5 17. Bb3 h6 18. Nxe4 Bf5 19. Ng3 Nxd5 20. Nxf5 gxf5 21. Bxd5 Rd8 22. Rc5 e6 23. Ke2 exd5)) 14. Bd3 O-O 15. h4 h6 16. Rc5 Bd7 17. Rb1 b5 18. e5 Rfc8 19. Be4 Bf8 20. Rcc1 Rab8) (7. Be3 c5 { [%csl Ge2][%cal Gf1e2,Gd1d2] } 8. Qd2 Qa5 9. Rc1 cxd4 10. cxd4 Qxd2+ 11. Bxd2 O-O 12. Nf3 Bg4 { [%cal Gh2h3] } 13. d5 (13. Be3 Nc6 14. d5 Bxf3 15. gxf3 Nd4 16. Bh3 f5 17. exf5 Nxf5) 13... Nd7 { Again, this is a total draw } 14. Rc7 { [%csl Gc6][%cal Gc7b7,Gd7c6] } 14... Rfc8 { [%csl Gb7,Gc5][%cal Gc7b7,Gd7c5] } 15. Rxb7 (15. Rxc8+ Rxc8 16. Be3 Rc2) 15... Nc5 { [%cal Gb7e7] } 16. Rxe7 (16. Rb1 Nxe4) (16. Rb4 a5 17. Rc4 Rab8 18. Be3 Nd3+ 19. Bxd3 Rxc4 20. Bxc4 Rb1+ 21. Ke2 Rxh1) 16... Bf6 { If they try to come into the 7th rank, you win
an exchange } 17. Ne5 Bxe7 18. Nxg4) (7. Bg5 c5 { [%cal Ga1c1] } 8. Rc1 O-O 9. Nf3 Bg4 { [%csl Gd5][%cal Gd4d5] } 10. d5 (10. Be2 cxd4 11. cxd4) 10... Qd6 11. Be2 Nd7 12. O-O (12. c4 Bxf3 (12... f5 13. exf5 Bxf5 14. Qd2 Nf6 15. Bf4 Qb6 16. O-O e6 17. Be5 exd5 18. cxd5) 13. Bxf3 b5 14. cxb5 a6 { [%csl Ga6][%cal Gb5a6] } 15. bxa6 { [%csl Ga6][%cal Gd6a6] } 15... Rxa6 { [%csl Ga8][%cal Gc1c2,Gf8a8] } 16. Rc2 Rfa8 { [%csl Ga1][%cal Ga8a1] } 17. Qe2 Bf6 { [%cal Gg5c1] } 18. Bxf6 { [%cal Gd6g6] } 18... Qxf6 { [%cal Ge1g1] } 19. O-O Ne5 20. Rfc1 c4 { [%csl Gc4][%cal Gh2h3,Ge5f3,Ge2f3,Ga6a2,Gc2c4] } 21. h3 { [%csl Ga6] } 21... Ra3) 12... Bxf3 13. Bxf3 b5 14. Qd2 c4 15. Qe3 a5 (15... Rfe8 16. Rfd1 Nc5) 16. Be2 Rfc8 17. Rfd1 Nc5 { Arun Prasad, Subramanian - Gupta, Abhijeet, 1/2-1/2, Kolkata op 5th, 2012, https://lichess.org/ytOnUHOq } 18. h3 Na4 19. Bf4 Qc5 20. Qxc5 Nxc5 21. e5 e6 22. g3 Rab8 23. h4 Bf8 24. Be3 exd5 25. Rxd5 Ne6) 7... c5 8. Ne2 Nc6 (8... O-O) 9. Be3 O-O 10. O-O Qc7 { [%cal Ge3f4] } (10... Bg4 11. f3 Bd7 { [%csl Gd8] } (11... Na5 12. Bxf7+ Rxf7 13. fxg4 Rxf1+ 14. Kxf1 { [%csl Gd6][%cal Gd8d6] } (14. Qxf1 Nc4 { [%csl Ge3][%cal Gc4e3] } 15. Qc1 { [%csl Gd5] } 15... cxd4 16. cxd4 Rc8 17. Bg5 (17. Qc3 Qd7 18. Rc1 (18. h3 { [%csl Gd7] } 18... Qe6 19. Qd3 Nxe3 20. Qxe3 Rc2) 18... Qd6)) 14... cxd4 15. cxd4 e5 16. Kg1 Rc8 17. d5 Nc4 18. Qb3 Nxe3 19. Qxe3 Rc2 (19... Qb6 20. Qc5) 20. a3 (20. h3 Qb6 21. Qxb6 axb6 22. Kf2 Bf8 23. Ke3 Bh6+ 24. Kf3 (24. Kf2 Bf8 25. Ke3 Bh6+ 26. Kf2 Bf8 27. Ke3 Bh6+ 28. Kf3 b5 29. h4 b4 30. g5 Bf8 31. Ke3 b5 32. Kd3 Rb2 33. Ke3 Bc5+ 34. Kd3) (24. Kd3 Rd2+) 24... Bf8 25. a4 (25. a3 Rd2 (25... Rb2)) 25... Rd2) (20. h4)) 12. Rb1 { [%cal Gb1b8] } 12... cxd4 (12... Qc7 13. Qd2 Rad8 14. Rfc1 Bc8 15. Qb2 Na5 { [%cal Gc4b3] } (15... e5) 16. Bd3 (16. Bb3) 16... e5 17. d5 c4 18. Bc2 b6 19. Rd1 Nb7 20. Ng3 Bd7 21. a4 Rfe8 22. Nf1 Bf8 23. Kh1 Rc8) 13. cxd4 a6 14. Rxb7 (14. Qd2 b5 15. Bd3 (15. Bb3 Qa5 16. Qd3 Nb4 17. Bd2 Nxd3 18. Bxa5 Bh6) 15... Qa5 16. Qxa5 Nxa5 17. Rfc1 Rfc8) (14. d5 Ne5 15. Bb3 Rc8) (14. f4 Na5 15. Bd3 e6 16. e5 (16. d5 exd5 17. exd5 b5) 16... Rc8) (14. a4 Na5 15. Bd3 Rc8) 14... Na5 15. Rb4 Nxc4 16. Rxc4 Bb5 17. Rb4 Qa5 18. Rb2 { [%csl Gf2][%cal Gb5e2] } 18... Rac8 $140 { The plan of Rfd8, maybe exchanging on e2, pushing e6. }) (10... cxd4 11. cxd4 Bg4 12. f3 Na5 { [%csl Gb3,Gd5][%cal Gc4b3,Gf5d5] } 13. Bxf7+ Rxf7 14. fxg4 Rxf1+ 15. Kxf1 { [%csl Ga5] } 15... Nc4 16. Qb3 Rc8) 11. Rc1 (11. Bf4 e5 12. dxe5 Nxe5 13. Rb1 Rb8) 11... Rd8 12. Bf4 Qd7 13. d5 Na5 14. Bd3 b5 15. Rb1 a6 16. Qc1 e6 17. Qa3 c4 18. Bc2 (18. Qxa5 cxd3 19. Nd4 Bb7) 18... Nb7 19. dxe6 Qxe6 20. Nd4 Qg4 (20... Qb6 { Dosi, Alberto - Nimtz, Dr. Manfred, 1/2-1/2, MT-Lannaioli (ITA), 2007, https://lichess.org/383JGVYV }) *
1. e4 (1. d4) (1. c4) 1... e5 2. Nf3 (2. Nc3 d5 (2... d6)) (2. d3) { Comment } *

[E "E"]
[S "S"]

1. e4 (1. d4) (1. c4) 1... e5 2. Nf3 (2. Nc3 d5 (2... d6)) (2. d3) { Comment } *
3 changes: 2 additions & 1 deletion deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ allow = [
"MIT",
"Apache-2.0",
"Unicode-DFS-2016",
"Unlicense"
"Unlicense",
"Unicode-3.0"
]
confidence-threshold = 0.8
exceptions = [
Expand Down
16 changes: 12 additions & 4 deletions src/eval_args.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
use crate::pgn::pgn_tokens_to_ast;

use super::pgn;
use super::Cli;

use memmap2::Mmap;
use std::fs::File;
use std::io::BufWriter;
use std::io::Write;

pub fn eval_args(cli: &Cli) {
use std::process::exit;
Expand Down Expand Up @@ -31,11 +35,15 @@ pub fn eval_args(cli: &Cli) {
}

let mut mmap = unsafe { mmap.unwrap_unchecked() };
let tokens = pgn::parse_pgn(&mut mmap);
let mut tokens = pgn::parse_pgn(&mut mmap);

for token in tokens {
println!("{token}");
}
std::hint::black_box(pgn_tokens_to_ast(&mut tokens));

// let ast = pgn_tokens_to_ast(&mut tokens);

// let file = File::create(&args.output).unwrap();
// let mut bufwriter = BufWriter::new(file);
// writeln!(bufwriter, "{:#?}", ast).unwrap();
}

crate::CommandE::License => {
Expand Down
161 changes: 134 additions & 27 deletions src/pgn/ast.rs
Original file line number Diff line number Diff line change
@@ -1,46 +1,153 @@
use super::{tokens::PgnToken, tree::Tree};
use std::{collections::VecDeque, error::Error};
// use super::tree::Tree;
use litemap::LiteMap;
use pgn_lexer::parser::Token;
use std::collections::VecDeque;

pub fn pgn_tokens_to_ast(
tokens: &mut VecDeque<PgnToken>,
) -> Result<Tree<PgnToken>, Box<dyn Error>> {
let mut tree = Tree::new();
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub enum PgnToken<'a> {
Token(Token<'a>),
VariationPointer(u16),
#[default]
None,
}

#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct PgnVariation<'a>(Vec<PgnToken<'a>>);

#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct PgnGame<'a>((Vec<Token<'a>>, LiteMap<u16, PgnVariation<'a>>));

pub fn pgn_tokens_to_ast<'a>(tokens: &mut VecDeque<Token<'a>>) -> Vec<PgnGame<'a>> {
let mut tree: Vec<PgnGame<'a>> = Vec::new();
let mut game_number = 0;
let mut amount_of_encountered_variations = 1;

tree.push(PgnGame::default());
unsafe {
let value = &mut tree.get_unchecked_mut(0).0;

value.0 = Vec::new();
value.1.insert(0, PgnVariation::default());
}

while tokens.len() != 0 {
next_token(tokens, &mut tree, u16::MAX)
next_token(
tokens,
&mut tree,
&mut game_number,
0,
&mut amount_of_encountered_variations,
);
}

Ok(tree)
tree.pop();

tree
}

fn next_token(
tokens: &mut VecDeque<PgnToken>,
tree: &mut Tree<PgnToken>,
mut half_move_number: u16,
) {
use PgnToken::*;
macro_rules! push_token {
($tree:expr, $game_number:expr, $variation_number:expr, $token:expr) => {
$tree
.get_mut($game_number as usize)
.unwrap()
.0
.1
.get_mut($variation_number)
.unwrap()
.0
.push($token.clone())
};
}

let token = tokens.pop_front().unwrap();
// TODO: AST building is currently done in plain strings only. Decide wether or not to convert it to binary format straight away, or do it with multi-threading after the fact
fn next_token<'a>(
tokens: &mut VecDeque<Token<'a>>,
tree: &mut Vec<PgnGame<'a>>,
game_number: &mut u32,
variation_number: u16,
amount_of_encountered_variations: &mut u16,
) {
// NOTE: I don't know if this is slow. (Like this whole approach) I'm just gonna pretend it isn't until it causes problems
let token = unsafe { tokens.pop_front().unwrap_unchecked() };

match token {
Header(_, _) | Comment(_) | Result(_) | VariationEnd => tree.insert(token),

HalfMoveNumber(v) => {
half_move_number = v;
Token::Move(_)
| Token::Commentary(_)
| Token::NAG(_)
| Token::MoveAnnotation(_)
| Token::MoveNumber(_, _) => {
push_token!(
tree,
*game_number,
&variation_number,
PgnToken::Token(token)
)
}
Token::TagSymbol(_) | Token::TagString(_) => tree
.get_mut(*game_number as usize)
.unwrap()
.0
.0
.push(token),
Token::NullMove(_) => {}
Token::EscapeComment(_) => { /* NOTE: IDK what to do with this */ }
Token::Result(_) => {
tree.get_mut(*game_number as usize)
.unwrap()
.0
.0
.push(token);

PgnMove(_) => {

}
*game_number += 1;
*amount_of_encountered_variations = 1;

VariationStart => next_token(tokens, tree, half_move_number),
tree.push(PgnGame::default());
unsafe {
let value = &mut tree.get_unchecked_mut(*game_number as usize).0;

NGA(_) => {
unreachable!()
value.0 = Vec::new();
value.1.insert(variation_number, PgnVariation::default());
}
}
Token::StartVariation(_) => {
let new_variation_number = *amount_of_encountered_variations * (variation_number + 1);

*amount_of_encountered_variations += 1;

push_token!(
tree,
*game_number,
&variation_number,
PgnToken::VariationPointer(new_variation_number)
);

None => {
unreachable!()
unsafe {
let value = &mut tree.get_unchecked_mut(*game_number as usize).0;
value
.1
.insert(new_variation_number, PgnVariation::default());
}

next_token(
tokens,
tree,
game_number,
new_variation_number,
amount_of_encountered_variations,
);
}
Token::EndVariation(_) => {
return;
}
}

if tokens.len() != 0 {
next_token(
tokens,
tree,
game_number,
variation_number,
amount_of_encountered_variations,
);
}
}
Loading

0 comments on commit 82d26c5

Please sign in to comment.