Skip to content

Commit

Permalink
data folders should be together
Browse files Browse the repository at this point in the history
Signed-off-by: Dushyant Behl <[email protected]>
  • Loading branch information
dushyantbehl committed Nov 28, 2024
1 parent 70252af commit 82b548f
Show file tree
Hide file tree
Showing 44 changed files with 16 additions and 17 deletions.
6 changes: 3 additions & 3 deletions tests/acceleration/test_acceleration_framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@
from tuning.utils.import_utils import is_fms_accelerate_available

# for some reason the CI will raise an import error if we try to import
# these from tests.testdata
# these from tests.artifacts.testdata
TWITTER_COMPLAINTS_JSON_FORMAT = os.path.join(
os.path.dirname(__file__), "../data/twitter_complaints_json.json"
os.path.dirname(__file__), "../artifacts/testdata/twitter_complaints_json.json"
)
TWITTER_COMPLAINTS_TOKENIZED = os.path.join(
os.path.dirname(__file__),
"../data/twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json",
"../artifacts/testdata/twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json",
)

# pylint: disable=import-error
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@
PRETOKENIZE_JSON_DATA_YAML = os.path.join(
PREDEFINED_DATA_CONFIGS, "pretokenized_json_data.yaml"
)
TOKENIZE_AND_INSTRUCTION_MASKING_YAML = os.path.join(
PREDEFINED_DATA_CONFIGS, "tokenize_and_instruction_masking.yaml"
TOKENIZE_AND_INPUT_MASKING_YAML = os.path.join(
PREDEFINED_DATA_CONFIGS, "tokenize_and_input_masking.yaml"
)
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/build/test_launch_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# First Party
from build.accelerate_launch import main
from build.utils import serialize_args, get_highest_checkpoint
from tests.testdata import TWITTER_COMPLAINTS_DATA_JSONL
from tests.artifacts.testdata import TWITTER_COMPLAINTS_DATA_JSONL
from tuning.utils.error_logging import (
USER_ERROR_EXIT_CODE,
INTERNAL_ERROR_EXIT_CODE,
Expand Down
2 changes: 1 addition & 1 deletion tests/data/test_data_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import pytest

# First Party
from tests.testdata import MODEL_NAME, TWITTER_COMPLAINTS_DATA_JSONL
from tests.artifacts.testdata import MODEL_NAME, TWITTER_COMPLAINTS_DATA_JSONL

# Local
from tuning.data.data_handlers import apply_custom_data_formatting_template
Expand Down
10 changes: 5 additions & 5 deletions tests/data/test_data_preprocessing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
import yaml

# First Party
from tests.predefined_data_configs import (
from tests.artifacts.predefined_data_configs import (
APPLY_CUSTOM_TEMPLATE_YAML,
PRETOKENIZE_JSON_DATA_YAML,
TOKENIZE_AND_INSTRUCTION_MASKING_YAML,
TOKENIZE_AND_INPUT_MASKING_YAML,
)
from tests.testdata import (
from tests.artifacts.testdata import (
MODEL_NAME,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSON,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSONL,
Expand Down Expand Up @@ -408,11 +408,11 @@ def test_validate_args_pretokenized(data_args, packing):
(PRETOKENIZE_JSON_DATA_YAML, TWITTER_COMPLAINTS_TOKENIZED_JSON),
(PRETOKENIZE_JSON_DATA_YAML, TWITTER_COMPLAINTS_TOKENIZED_JSONL),
(
TOKENIZE_AND_INSTRUCTION_MASKING_YAML,
TOKENIZE_AND_INPUT_MASKING_YAML,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSON,
),
(
TOKENIZE_AND_INSTRUCTION_MASKING_YAML,
TOKENIZE_AND_INPUT_MASKING_YAML,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSONL,
),
],
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sft_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
# First Party
from build.utils import serialize_args
from scripts.run_inference import TunedCausalLM
from tests.testdata import (
from tests.artifacts.testdata import (
EMPTY_DATA,
MALFORMATTED_DATA,
MODEL_NAME,
Expand Down
2 changes: 1 addition & 1 deletion tests/trainercontroller/test_tuning_trainercontroller.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from tests.trainercontroller.custom_operation_invalid_action import (
CustomOperationInvalidAction,
)
import tests.testdata.trainercontroller as td
import tests.artifacts.testdata.trainercontroller as td

# Local
import tuning.config.configs as config
Expand Down
2 changes: 1 addition & 1 deletion tests/utils/test_tokenizer_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from transformers import AutoModelForCausalLM, AutoTokenizer

# First Party
from tests.testdata import MODEL_NAME
from tests.artifacts.testdata import MODEL_NAME

# Local
# First party
Expand Down
3 changes: 1 addition & 2 deletions tuning/config/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,7 @@ class DataArguments:
default=None,
metadata={
"help": "data config file which specifies the data preprocessing logic to apply.\
Supports both JSON and YAML based config files.\
for examples see examples/predefined_data_configs"
Supports both JSON and YAML based config files."
},
)

Expand Down

0 comments on commit 82b548f

Please sign in to comment.