Skip to content

Commit

Permalink
code to make a dataset of proofgames, running with timeout=180s
Browse files Browse the repository at this point in the history
  • Loading branch information
dpaleka committed Oct 23, 2023
1 parent 0fd23a2 commit 21df1fd
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 56 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,8 @@ This program is free software: you can redistribute it and/or modify it under th

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.


## Daniel's instructions
Copy texelutil binary (installed by any means, just `texelutil`) to this folder.
48 changes: 48 additions & 0 deletions pgn_to_fen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
There is a file made using this code:
with open(os.path.join(DATA_DIR, "FILE.csv"), "w") as f:
writer = csv.writer(f)
for uid, rating, board, solution in extracted_puzzles:
writer.writerow((uid, rating,
str(chess.pgn.Game().from_board(board)).split("\n")[-1][:-2],
" ".join(solution)))
Make a file with the following format: uid, rating, fen, solution. Label the columns "uid", "rating", "FEN", "solution".
Use argparse to input the file name and the output file name.
"""

import csv
import os
import io
import argparse
from tqdm import tqdm
import chess.pgn

def pgn_to_fen(input_file, output_file, num_entries=None):
with open(input_file, "r") as f_in, open(output_file, "w") as f_out:
reader = csv.reader(f_in)
writer = csv.writer(f_out)

# write header
writer.writerow(("uid", "rating", "FEN", "solution"))
for i, row in tqdm(enumerate(reader)):
if num_entries is not None and i >= num_entries:
break
uid, rating, pgn, solution = row
game = chess.pgn.read_game(io.StringIO(pgn))
board = game.board()
for move in game.mainline_moves():
board.push(move)
fen = board.fen()

writer.writerow((uid, rating, fen, solution))

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--input_file", "-i", help="Name of the input file", default="/data/chess-data/lichess_puzzles/pgn_puzzles.csv")
parser.add_argument("--output_file", "-o", help="Name of the output file", default="/data/chess-data/lichess_puzzles/fen_puzzles.csv")
parser.add_argument("--num_entries", "-n", type=int, help="Number of entries to process", default=None)

args = parser.parse_args()

pgn_to_fen(args.input_file, args.output_file, args.num_entries)
132 changes: 77 additions & 55 deletions proofgame.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,55 +22,67 @@
We also need to make sure that the PGN is valid. We can do this by playing the game out on a chessboard, using python-chess,
and then checking that the final position is the same as the one we started with.
"""
#%%

import os
import io
import argparse
import subprocess
from multiprocessing import Pool
import re
import chess
import chess.pgn
from pathlib import Path
import pandas as pd
import pebble
from tqdm import tqdm
from concurrent.futures import TimeoutError

# Add texelutil to the PATH
TEXELUTIL_PATH = Path(".").resolve()
print(TEXELUTIL_PATH)
os.environ["PATH"] += os.pathsep + str(TEXELUTIL_PATH)

SEED = 42

# List of FENs
fens = ["r6r/pp3pk1/5Rp1/n2pP1Q1/2pPp3/2P1P2q/PP1B3P/R5K1 w - - 0 1",
"r6r/pp3pk1/5Rp1/n2pP1Q1/2pPp3/2P1P2q/PP1B3P/R5K1 w - - 0 1",
"r1b3k1/pp3Rpp/3p1b2/2pN4/2P5/5Q1P/PPP3P1/4qNK1 w - - 0 1"] # TODO get this from a FENS_FILE csv file instead. it will be a csv with a 'FEN' column
DATA_DIR = "/data/chess-data/lichess_puzzles"
SEED = 42
TEXELUTIL_RES_DIR = "./texel_temp_results"
os.makedirs(TEXELUTIL_RES_DIR, exist_ok=True)
MAX_THREADS = 64
TIMEOUT = 180 # Timeout in seconds

TEXELUTIL_RES_DIR = "" # TODO add this to the code below, where needed
# List of FENs for testing
fens_test = ["r6r/pp3pk1/5Rp1/n2pP1Q1/2pPp3/2P1P2q/PP1B3P/R5K1 w - - 0 1",
"r6r/pp3pk1/5Rp1/n2pP1Q1/2pPp3/2P1P2q/PP1B3P/R5K1 w - - 0 1",
"r1b3k1/pp3Rpp/3p1b2/2pN4/2P5/5Q1P/PPP3P1/4qNK1 w - - 0 1"]

def check_contains_fen(fen, file):
if not os.path.exists(file):
return False
with open(file, "r") as f:
content = f.read()
return content.startswith(fen)



def run_command(fen, thread_id, TIMEOUT=0.5*60, force=False):
FIRST_FILE = f"result_t_{thread_id}_00"
def move_01(fen : str):
"""
Replace the last two numbers with 0 and 1.
"""
fen = fen.split()
assert(len(fen) == 6 and fen[4].isdigit() and fen[5].isdigit())
fen[4] = "0"
fen[5] = "1"
return " ".join(fen)

def run_command(fen, thread_id, force=False):
FIRST_FILE = f"{TEXELUTIL_RES_DIR}/result_t_{thread_id}_00"
if not force and os.path.exists(FIRST_FILE):
if check_contains_fen(fen, FIRST_FILE):
print(f"Thread {thread_id}: Already solved")
return

command = f'echo "{fen}" | texelutil proofgame -f -o result_t_{thread_id}_ -rnd {SEED} 2>debug_t_{thread_id}_'
# time it to TIMEOUT
#subprocess.run(command, shell=True)
command = f'echo "{fen}" | texelutil proofgame -f -o {TEXELUTIL_RES_DIR}/result_t_{thread_id}_ -rnd {SEED} 2>{DATA_DIR}/logs/debug_t_{thread_id}_.log'
try:
subprocess.run(command, shell=True, timeout=TIMEOUT)
except subprocess.TimeoutExpired:
print(f"Thread {thread_id}: Timeout expired")


def convert_to_pgn(moves):
moves = moves.split()
pgn = ""
Expand All @@ -81,69 +93,79 @@ def convert_to_pgn(moves):
pgn += f"{moves[i + 1]} "
return pgn.strip()


def validate_pgn(pgn : str, fen : str, ignore_move_number=True):
"""
pgn: string of the form "1. g4 d5 2. f4 h5 3. gxh5 e5 4. Nh3 Bxh3 5. Bxh3 e4 6. Bd7+
fen: string of the form "r6r/pp3pk1/5Rp1/n2pP1Q1/2pPp3/2P1P2q/PP1B3P/R5K1 w - - 0 1"
"""
print(f"pgn: {pgn}")
print(f"fen: {fen}")
# Initialize an empty chess board
board = chess.Board()

# Parse the PGN game
game = chess.pgn.read_game(io.StringIO(pgn))

# Play out the game on the board
for move in game.mainline_moves():
board.push(move)

# Compare the final position with the provided FEN
print(f"board fen: {board.fen()}")
if ignore_move_number:
# Ditch the last two fields of the FEN
fen = " ".join(fen.split()[:-2])
return board.fen().startswith(fen)
else:
return board.fen() == fen


MAX_THREADS = 32
with Pool(MAX_THREADS) as p:
p.starmap(run_command, [(fen, i) for i, fen in enumerate(fens)])


#%%
SAVE_FILENAME = "proofgame_pgns.csv" # TODO incorporate this

def process_output(thread_id):
# Find the last output file
def process_output(thread_id) -> str:
i = 0
while os.path.exists(f"result_t_{thread_id}_{i:02d}"):
while os.path.exists(f"{TEXELUTIL_RES_DIR}/result_t_{thread_id}_{i:02d}"):
i += 1
last_file = f"result_t_{thread_id}_{i - 1:02d}"

last_file = f"{TEXELUTIL_RES_DIR}/result_t_{thread_id}_{i - 1:02d}"
with open(last_file, "r") as f:
content = f.read()

# Check if the proof game was found
match = re.search(r"legal: proof: (.*)", content)
if match:
moves = match.group(1)
pgn = convert_to_pgn(moves)
if validate_pgn(pgn, fens[thread_id]):
print(f"Thread {thread_id}: Proof game is valid")

return pgn
else:
print(f"Thread {thread_id}: Proof game is invalid")
return None
else:
print(f"Thread {thread_id}: No proof game found")
return None

def main(args):
global fens
if args.fens_file:
df = pd.read_csv(args.fens_file)
fens = df['FEN'].tolist()
else:
fens = fens_test

print(f"Computing {len(fens)} proof games")
for i in range(0, len(fens), MAX_THREADS):
print(f"Processing {i} to {i + MAX_THREADS}")
import multiprocessing
pool = multiprocessing.Pool(MAX_THREADS)
with pool:
pool.starmap(run_command, [(fen, i + thread_id) for thread_id, fen in enumerate(fens[i:i + MAX_THREADS])])
pool.close()
pool.join()
# kill texelutil bc it's not closing properly
subprocess.run("killall texelutil", shell=True)
import time
time.sleep(max(5, TIMEOUT/10))

print("Processing output")
if args.fens_file:
for thread_id in tqdm(range(len(fens))):
pgn = process_output(thread_id)
if pgn:
df.loc[thread_id, 'proofgame'] = pgn
else:
df.loc[thread_id, 'proofgame'] = None
df.to_csv(args.save_filename, index=False)
else:
for thread_id in tqdm(range(len(fens))):
process_output(thread_id)


# Process the output files
for i in range(len(fens)):
process_output(i)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--fens_file", help="CSV file with a 'FEN' column", default=None)
parser.add_argument("--save_filename", help="File to save the results", default="/data/chess-data/lichess_puzzles/proofgame_pgns.csv")
args = parser.parse_args()
main(args)

#%%

# TODO make a function that calls all of the below
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
python-chess
requests
zstandard
pebble

0 comments on commit 21df1fd

Please sign in to comment.