Source code for modalities.models.huggingface.huggingface_model

from pathlib import Path
from typing import Any, Optional

import torch
from pydantic import BaseModel, ConfigDict
from transformers import AutoModelForCausalLM, AutoModelForMaskedLM, AutoTokenizer

from modalities.config.lookup_enum import LookupEnum
from modalities.models.model import NNModel

# Huggingface Model dependencies
#
# ModuleUtilsMixin
# GenerationMixin
# PushToHubMixin
# PeftAdapterMixin
#   <- PreTrainedModel
#       <- LlamaPreTrainedModel    The bare LLaMA Model outputting raw hidden-states without any specific head on top.
#           <- LlamaModel      The bare LLaMA Model outputting raw hidden-states without any specific head on top.
#           <- LlamaForCausalLM
#           <- LlamaForSequenceClassification    The LLaMa transformer with a sequence classif. head on top (lin. layer)



[docs]
class HuggingFaceModelTypes(LookupEnum):
    """
    HuggingFaceModelTypes enumeration class representing different types of HuggingFace models.

    Attributes:
        AutoModelForCausalLM: Represents the AutoModelForCausalLM class.
        AutoModelForMaskedLM: Represents the AutoModelForMaskedLM class.
    """

    AutoModelForCausalLM = AutoModelForCausalLM
    AutoModelForMaskedLM = AutoModelForMaskedLM




[docs]
class HuggingFacePretrainedModelConfig(BaseModel):
    """
    Configuration class for HuggingFacePretrainedModel.

    Attributes:
        model_type (HuggingFaceModelTypes): The type of the HuggingFace model.
        model_name (Path): The path to the HuggingFace model.
        prediction_key (str): The key for accessing the prediction.
        huggingface_prediction_subscription_key (str): The subscription key for HuggingFace prediction.
        sample_key (str): The key for accessing the sample.
        model_args (Any, optional): Optional additional arguments for the model.
        kwargs (Any, optional): Optional additional keyword arguments.
    """

    model_type: HuggingFaceModelTypes
    model_name: Path
    prediction_key: str
    huggingface_prediction_subscription_key: str
    sample_key: str
    model_args: Optional[Any] = None
    kwargs: Optional[Any] = None

    # avoid warning about protected namespace 'model_', see
    # https://docs.pydantic.dev/2.7/api/config/#pydantic.config.ConfigDict.protected_namespaces
    model_config = ConfigDict(protected_namespaces=())




[docs]
class HuggingFacePretrainedModel(NNModel):
    """HuggingFacePretrainedModel class for HuggingFace models."""

    def __init__(
        self,
        model_type: HuggingFaceModelTypes,
        model_name: str,
        prediction_key: str,
        huggingface_prediction_subscription_key: str,
        sample_key: str,
        model_args: Optional[Any] = None,
        kwargs: Optional[Any] = None,
    ):
        """
        Initializes a HuggingFaceModel object.

        Args:
            model_type (HuggingFaceModelTypes): The type of Hugging Face model.
            model_name (str): The name of the Hugging Face model.
            prediction_key (str): The key for accessing predictions.
            huggingface_prediction_subscription_key (str): The subscription key for Hugging Face predictions.
            sample_key (str): The key for accessing samples.
            model_args (Any, optional): Additional arguments for the Hugging Face model. Defaults to None.
            kwargs (Any, optional): Additional keyword arguments for the Hugging Face model. Defaults to None.
        """
        super().__init__()
        if model_args is None:
            model_args = []
        if kwargs is None:
            kwargs = {}
        self.prediction_key = prediction_key
        self.huggingface_prediction_subscription_key = huggingface_prediction_subscription_key
        self.sample_key = sample_key

        # NOTE: If the model needs to be downloaded, it is NOT necessary to guard the access for rank 0.
        # This is taken care of internally in huggingface hub see:
        # https://github.com/huggingface/huggingface_hub/blob/3788f537b10c7d02149d6bf017d2ce19885f90a2/src/huggingface_hub/file_download.py#L1457
        self.huggingface_model = model_type.value.from_pretrained(
            model_name, local_files_only=False, *model_args, **kwargs
        )


[docs]
    def forward(self, inputs: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
        """
        Forward pass of the model.

        Args:
            inputs (dict[str, torch.Tensor]): A dictionary containing input tensors.

        Returns:
            dict[str, torch.Tensor]: A dictionary containing output tensors.
        """
        output = self.huggingface_model(inputs[self.sample_key])
        return {self.prediction_key: output[self.huggingface_prediction_subscription_key]}


    @property
    def fsdp_block_names(self) -> list[str]:
        """
        Returns a list of FSDP block names.

        Returns:
            list[str]: A list of FSDP block names.
        """
        return self.huggingface_model._no_split_modules



if __name__ == "__main__":
    tokenizer = AutoTokenizer.from_pretrained("epfl-llm/meditron-7b")
    model = HuggingFacePretrainedModel(
        model_type=HuggingFaceModelTypes.AutoModelForCausalLM,
        model_name="epfl-llm/meditron-7b",
        prediction_key="logits",
        huggingface_prediction_subscription_key="logits",
        sample_key="input_ids",
    )
    print(model)