diff --git a/example/rlhf/demo_rl.py b/example/rlhf/demo_rl.py index 79e8d2f..3e80f18 100644 --- a/example/rlhf/demo_rl.py +++ b/example/rlhf/demo_rl.py @@ -1,7 +1,7 @@ """ accelerate config -LOCAL_DIR=/home/ubuntu/pykoi/pykoi # change this to your local path +LOCAL_DIR=/home/ubuntu/pykoi # change this to your local path export PYTHONPATH=$PYTHONPATH:${LOCAL_DIR} @@ -17,7 +17,7 @@ config = RLHFConfig( base_model_path="models/rlhf_step1_sft", #"elinas/llama-7b-hf-transformers-4.29", dataset_type="huggingface", - dataset_name="goldmermaid/stack_exchange_rank_10k_dataset", + dataset_name="cambioml/stack_exchange_rank_10k_dataset", dataset_subset_rl="data", reward_model_path="models/rlhf_step2_rw/", #"cambioml/rlhf_reward_model", save_freq=1, diff --git a/pyproject.toml b/pyproject.toml index f473e1d..f1f66a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ python-multipart = { version = "0.0.6", optional = true } tiktoken = { version = "0.4.0", optional = true } sentence-transformers = { version = "2.2.2", optional = true } -datasets = { version = "2.13.1", optional = true } +datasets = { version = "2.14.5", optional = true } evaluate = { version = "0.4.0", optional = true } peft = { version = "0.5.0", optional = true } trl = { version = "0.4.7", optional = true }