Source code for oumi.core.configs.params.peft_params

# Copyright 2025 - Oumi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass, field
from enum import Enum
from typing import Literal, Optional

from peft import LoraConfig
from peft.utils.peft_types import TaskType
from transformers import BitsAndBytesConfig

from oumi.core.configs.params.base_params import BaseParams



[docs]
class PeftSaveMode(Enum):
    """Enum representing how to save the final model during PEFT training.

    While models saved with any of these options can be loaded by Oumi, those saved
    with `ADAPTER_ONLY` are not self-contained; the base model will be loaded
    separately from the local HF cache or downloaded from HF Hub if not in the cache.
    """

    ADAPTER_ONLY = "adapter_only"
    """Only save the model adapter.

    Note that when loading this saved model, the base model will be loaded separately
    from the local HF cache or downloaded from HF Hub.
    """

    ADAPTER_AND_BASE_MODEL = "adapter_and_base_model"
    """Save the base model in addition to the adapter.

    This is similar to `ADAPTER_ONLY`, but the base model's weights are also saved in
    the same directory as the adapter weights, making the output dir self-contained.
    """

    MERGED = "merged"
    """Merge the adapter and base model's weights and save as a single model.

    Note that the resulting model is a standard HF Transformers model, and is no longer
    a PEFT model. A copy of the adapter before merging is saved in the "adapter/"
    subdirectory.
    """




[docs]
class LoraWeightInitialization(str, Enum):
    """Enum representing the supported weight initializations for LoRA adapters."""

    DEFAULT = "default"  # Use the model reference initialization from Microsoft.
    RANDOM = "random"  # Fully random initialization, discouraged.
    GAUSSIAN = "gaussian"
    EVA = "eva"
    PISA = "pissa"
    PISSA_NITER = "pissa_niter_[number of iters]"
    LOFTQ = "loftq"
    OLORA = "olora"


[docs]
    def get_literal_value(
        self,
    ) -> Literal[
        "default",
        "random",
        "gaussian",
        "eva",
        "pissa",
        "pissa_niter_[number of iters]",
        "loftq",
        "olora",
    ]:
        """Returns a literal value of the enum."""
        if self.value not in {
            "default",
            "random",
            "gaussian",
            "eva",
            "pissa",
            "pissa_niter_[number of iters]",
            "loftq",
            "olora",
        }:
            raise ValueError(f"Invalid enum value: {self.value}")

        return self.value





[docs]
@dataclass
class PeftParams(BaseParams):
    # Lora Params
    lora_r: int = field(
        default=8,
        metadata={"help": "LoRA R value."},
    )
    """The rank of the update matrices in LoRA.

    A higher value allows for more expressive adaptations but increases
    the number of trainable parameters.
    """

    lora_alpha: int = field(
        default=8,
        metadata={"help": "LoRA alpha."},
    )
    """The scaling factor for the LoRA update.

    This value is typically set equal to `lora_r` or `2*lora_r` for stable training.
    """

    lora_dropout: float = field(
        default=0.0,
        metadata={"help": "LoRA dropout."},
    )
    """The dropout probability applied to LoRA layers.

    This helps prevent overfitting in the adaptation layers.
    """

    lora_target_modules: Optional[list[str]] = field(
        default=None,
        metadata={"help": "LoRA target modules."},
    )
    """List of module names/regexes to apply LoRA to.

    If None, modules that are LoRA-trained are chosen based on the model's architecture.
    Specify ["all-linear"] to apply LoRA to all linear/Conv1D layers in the model.
    Specify a list of module names to only apply LoRA to those modules in the model.
    Finally, specifying [] to avoid targeting any modules (ex. if you want to set
    lora_target_parameters instead).
    """

    lora_target_parameters: Optional[list[str]] = field(
        default=None,
        metadata={"help": "LoRA target parameters."},
    )
    """List of parameter names/regexes to apply LoRA to.

    This is similar to `lora_target_modules` (which you should prefer using if possible)
    but for parameters instead of modules. This is generally only useful for models like
    MoEs that sometimes use nn.Parameter instead of nn.Module.
    """

    lora_modules_to_save: Optional[list[str]] = field(
        default=None,
        metadata={"help": "Model layers to unfreeze and train."},
    )
    """List of module names to unfreeze and train alongside LoRA parameters.

    These modules will be fully fine-tuned, not adapted using LoRA.
    Use this to selectively train certain parts of the model in full precision.
    """

    lora_bias: str = field(
        default="none",
        metadata={
            "help": (
                "Bias type for Lora. Can be 'none', 'all' or 'lora_only'. "
                "If 'all' or 'lora_only', the corresponding biases will "
                "be updated during training. Be aware that this means that, "
                "even when disabling the adapters, the model will not "
                "produce the same output as the base model would have "
                "without adaptation."
                "NOTE: see: "
                "https://github.com/huggingface/peft/blob/main/src/peft/tuners/lora/config.py"
                "for more details."
            )
        },
    )
    """Bias type for LoRA.

    Can be 'none', 'all' or 'lora_only':
    - 'none': No biases are trained.
    - 'all': All biases in the model are trained.
    - 'lora_only': Only biases in LoRA layers are trained.

    If 'all' or 'lora_only', the corresponding biases will be updated during training.
    Note that this means even when disabling the adapters, the model will not produce
    the same output as the base model would have without adaptation.

    For more details, see:
    https://github.com/huggingface/peft/blob/main/src/peft/tuners/lora/config.py
    """

    lora_init_weights: LoraWeightInitialization = field(
        default=LoraWeightInitialization.DEFAULT,
        metadata={
            "help": "Weights initialization for LoRA adapters.",
        },
    )
    """
    Passing `LoraWeightInitialization.DEFAULT` will use the underlying reference
    implementation of the corresponding model from Microsoft.

    Other valid (LoraWeightInitialization) options include:
        - "random" which will use fully random initialization and is discouraged.
        - "gaussian" for Gaussian initialization.
        - "eva" for Explained Variance Adaptation (EVA) (https://arxiv.org/abs/2410.07170).
        - "loftq" for improved performance when LoRA is combined with with quantization (https://arxiv.org/abs/2310.08659).
        - "olora" for Orthonormal Low-Rank Adaptation of Large Language Models (OLoRA) (https://arxiv.org/html/2406.01775v1).
        - "pissa" for Principal Singular values and Singular vectors Adaptation (PiSSA) (https://arxiv.org/abs/2404.02948).

    For more information, see HF:
        https://github.com/huggingface/peft/blob/main/src/peft/tuners/lora/config.py
    """

    lora_task_type: TaskType = TaskType.CAUSAL_LM
    """The task type for LoRA adaptation.

    Defaults to CAUSAL_LM (Causal Language Modeling).
    """

    # Q-Lora Params
    q_lora: bool = field(default=False, metadata={"help": "Use model quantization."})
    """Whether to use quantization for LoRA (Q-LoRA).

    If True, enables quantization for more memory-efficient fine-tuning.
    """

    q_lora_bits: int = field(
        default=4, metadata={"help": "Quantization (precision) bits."}
    )
    """The number of bits to use for quantization in Q-LoRA.

    This is only used if `q_lora` is True.

    Defaults to 4-bit quantization.
    """

    # FIXME the names below use the bnb short for bits-and-bytes
    # If we consider wrapping more quantization libraries a better
    # naming convention should be applied.
    bnb_4bit_quant_type: str = field(
        default="fp4", metadata={"help": "4-bit quantization type (fp4 or nf4)."}
    )
    """The type of 4-bit quantization to use.

    Can be 'fp4' (float point 4) or 'nf4' (normal float 4).
    """

    llm_int8_skip_modules: Optional[list[str]] = field(
        default=None,
        metadata={"help": "Modules that we do not want to convert in 8-bit"},
    )
    """ An explicit list of the modules that we do not want to convert in 8-bit.

    """

    use_bnb_nested_quant: bool = field(
        default=False, metadata={"help": "Use nested quantization."}
    )
    """Whether to use nested quantization.

    Nested quantization can provide additional memory savings.
    """

    bnb_4bit_quant_storage: str = field(
        default="uint8",
        metadata={"help": "Storage type to pack the quantized 4-bit params."},
    )
    """The storage type for packing quantized 4-bit parameters.

    Defaults to 'uint8' for efficient storage.
    """

    bnb_4bit_compute_dtype: str = field(
        default="float32",
        metadata={"help": "The compute type of the quantized parameters."},
    )
    """Compute type of the quantized parameters.
    It can be different than the input type, e.g., it can be set to a lower precision
    for improved speed.

    The string will be converted to the corresponding torch.dtype.

    Valid string options are:
    - "float32" for 32-bit floating point
    - "float16" for 16-bit floating point
    - "bfloat16" for brain floating point
    - "float64" for 64-bit floating point

    Defaults to "float16" for half precision.
    """

    peft_save_mode: PeftSaveMode = PeftSaveMode.ADAPTER_ONLY
    """How to save the final model during PEFT training.

    This option is only used if `TrainingParams.save_final_model` is True.
    By default, only the model adapter is saved to reduce disk usage.
    Options are defined in the `PeftSaveMode` enum and include:
    - ADAPTER_ONLY: Only save the model adapter.
    - ADAPTER_AND_BASE_MODEL: Save the base model in addition to the adapter.
    - MERGED: Merge the adapter and base model's weights and save as a single model.
    """


[docs]
    def to_lora(self) -> LoraConfig:
        """Creates a configuration for LoRA via HF's peft library."""
        if self.lora_init_weights == LoraWeightInitialization.RANDOM:
            init_lora_weights = False
        elif self.lora_init_weights == LoraWeightInitialization.DEFAULT:
            init_lora_weights = True
        else:
            init_lora_weights = self.lora_init_weights.value

        # LoraConfig's target_modules is type Optional[Union[list[str], str]], but
        # since OmegaConf doesn't support a union between a list and a primitive type,
        # our field's type is Optional[list[str]].
        #
        # This is special handling for the "all-linear" special case.
        # See: https://huggingface.co/docs/peft/en/package_reference/lora#peft.LoraConfig.target_modules
        target_modules = self.lora_target_modules
        if target_modules == ["all-linear"]:
            target_modules = "all-linear"

        return LoraConfig(
            r=self.lora_r,
            lora_alpha=self.lora_alpha,
            lora_dropout=self.lora_dropout,
            target_modules=target_modules,
            target_parameters=self.lora_target_parameters,
            modules_to_save=self.lora_modules_to_save,
            bias=self.lora_bias,  # type: ignore
            task_type=self.lora_task_type,
            init_lora_weights=init_lora_weights,
        )



[docs]
    def to_bits_and_bytes(self) -> BitsAndBytesConfig:
        """Creates a configuration for quantized models via BitsAndBytes.

        The resulting configuration uses the instantiated peft parameters.
        """
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=self.q_lora_bits == 4,
            load_in_8bit=self.q_lora_bits == 8,
            llm_int8_skip_modules=self.llm_int8_skip_modules,
            bnb_4bit_compute_dtype=self.bnb_4bit_compute_dtype,
            bnb_4bit_quant_type=self.bnb_4bit_quant_type,
            bnb_4bit_use_double_quant=self.use_bnb_nested_quant,
            bnb_4bit_quant_storage=self.bnb_4bit_quant_storage,
        )
        return quantization_config