diff --git a/unsloth-cli-2.py b/unsloth-cli-2.py new file mode 100644 index 00000000..fe6acbdc --- /dev/null +++ b/unsloth-cli-2.py @@ -0,0 +1,688 @@ +#!/usr/bin/env python3 + +""" +🦥 Enhanced Script for Fine-Tuning, Merging, and Managing Language Models with Unsloth + +This script significantly extends the original unsloth-cli.py with a wide range of advanced features: + +- Comprehensive training pipeline with validation and testing support +- Advanced error handling and fallback options for robust model loading +- Merging functionality for LoRA adapters with dequantization options +- Flexible quantization and precision control (4-bit, 16-bit, 32-bit) +- Support for custom datasets and data formats (JSON, Parquet) +- Integration with Hugging Face models and push-to-hub functionality +- GGUF conversion for optimized model deployment +- Enhanced logging and progress tracking + +Key Features: + +1. Flexible Data Handling: + - Support for Parquet, JSON, and other data formats + - Custom data parsing and processing pipelines + +2. Advanced Model Management: + - Load and save models in various formats (Hugging Face, GGUF) + - Quantization options for memory-efficient training and inference + - Dequantization capabilities for precision-sensitive operations + +3. Comprehensive Training Pipeline: + - Support for train, validation, and test datasets + - Customizable training parameters (batch size, learning rate, etc.) + - Integration with popular optimization techniques (LoRA, gradient checkpointing) + +4. Merging and Adaptation: + - Merge LoRA adapters with base models + - Dequantization options for merging quantized models + #TODO @9/17/2024 ADD LoRA+LoRA and Model+Model Merging Methods (lazy Merge kit): To merge fully + #TODO @9/17/2024 ADD DIRECT QUANTIZER FOR SAFETENSOR: To quantize safetensors and support both gguf and safetensors + +5. Deployment and Sharing: + - GGUF conversion for optimized model deployment + - Direct integration with Hugging Face Hub for easy model sharing + +6. Robust Error Handling and Logging: + - Detailed error messages and logging for easier debugging + - Fallback options for model loading and processing + +Usage example for training: + python unsloth-cli-2.py train --model_name "your_model_path" --train_dataset "train.parquet" \ + --validation_dataset "val.parquet" --test_dataset "test.parquet" \ + --max_seq_length 2048 --load_in_4bit \ + --per_device_train_batch_size 4 --gradient_accumulation_steps 8 \ + --max_steps 1000 --learning_rate 2e-5 --output_dir "outputs" \ + --save_model --save_path "model" --quantization "q4_k_m" \ + --push_to_hub --hub_path "your_hub/model" --hub_token "your_token" + +Usage example for merging: + python unsloth-cli-2.py merge --base_model_path "path/to/base/model" \ + --adapter_path "path/to/adapter" --output_path "path/to/output" \ + --dequantize f16 + +Usage example with dequantization: + python unsloth-cli-2.py train --model_name "your_model_path" --train_dataset "train.parquet" \ + --load_in_4bit --dequantize + +Dequantization Feature: +The --dequantize option allows you to convert quantized weights back to full precision +after loading the model. This can be useful when you want to fine-tune or use a +previously quantized model in full precision. However, please note: + +1. Dequantization does not recover information lost during the initial quantization. + The quality of the model may still be lower than if it was originally trained in + full precision. + +2. Dequantizing increases memory usage significantly, as it converts weights to + full precision (typically float32). + +3. This option is most useful when you need to perform operations that require + full precision weights but want to start from a quantized model. + +To see a full list of configurable options, use: + python unsloth-cli-2.py train --help + python unsloth-cli-2.py merge --help + +Happy fine-tuning, merging, and deploying with Unsloth! 🦥🚀 +""" + +import argparse +import logging +import os +import torch +import json +import struct +from unsloth import FastLanguageModel +from unsloth import is_bfloat16_supported +from datasets import load_dataset, DatasetDict +from peft import PeftModel +from trl import SFTTrainer +from transformers import TrainingArguments, AutoModelForCausalLM, AutoTokenizer +from safetensors import safe_open + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def analyze_safetensors_header(file_path): + try: + with open(file_path, 'rb') as f: + header_size_bytes = f.read(8) + header_size = struct.unpack('