Modalities
Getting Started
Quickstart
Configuration
Model Cards
Benchmarking
Known Issues
Datasets
MemMap Datasets
Entrypoints
Entrypoints
VSCode Setup
VSCode Setup
Future Work
Future Work
API
modalities
Modalities
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
|
Z
A
ABSOLUTE (modalities.models.gpt2.gpt2_model.PositionTypes attribute)
activation (modalities.models.coca.coca_model.TextDecoderConfig attribute)
activation_type (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
ActivationCheckpointingVariants (class in modalities.training.activation_checkpointing.activation_checkpointing_variants)
ActivationType (class in modalities.models.model)
add_cls_token (modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
add_deprecated_alias() (in module modalities.utils.deprecated_alias)
add_subscriber() (modalities.logging_broker.message_broker.MessageBroker method)
(modalities.logging_broker.message_broker.MessageBrokerIF method)
ALL_CONFIGS (modalities.utils.benchmarking.benchmarking_utils.SweepSets attribute)
apply_rotary_pos_emb() (modalities.models.gpt2.gpt2_model.RotaryTransform method)
AppState (class in modalities.checkpointing.stateful.app_state)
AppStateFactory (class in modalities.checkpointing.stateful.app_state_factory)
attention_config (modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
attention_engine_type (modalities.nn.attention.AttentionConfig attribute)
attention_implementation (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
attention_norm_config (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
AttentionConfig (class in modalities.models.gpt2.gpt2_model)
(class in modalities.nn.attention)
AttentionConfig.QueryKeyValueTransformConfig (class in modalities.models.gpt2.gpt2_model)
AttentionConfig.QueryKeyValueTransformConfig.IdentityTransformConfig (class in modalities.models.gpt2.gpt2_model)
AttentionConfig.QueryKeyValueTransformConfig.RotaryTransformConfig (class in modalities.models.gpt2.gpt2_model)
AttentionEngineType (class in modalities.nn.attention)
AttentionImplementation (class in modalities.models.gpt2.gpt2_model)
AttentionPooling (class in modalities.models.coca.attention_pooling)
AttentionType (class in modalities.nn.attention)
B
base_freq (modalities.models.gpt2.gpt2_model.AttentionConfig.QueryKeyValueTransformConfig.RotaryTransformConfig attribute)
base_lrs (modalities.optimizers.scheduler_list.SchedulerList property)
base_model_prefix (modalities.conversion.gpt2.modeling_gpt2.GPT2ForQuestionAnswering attribute)
(modalities.conversion.gpt2.modeling_gpt2.GPT2PreTrainedModel attribute)
base_model_tp_plan (modalities.conversion.gpt2.configuration_gpt2.GPT2Config attribute)
BaseReader (class in modalities.dataloader.large_file_lines_reader)
Batch (class in modalities.batch)
batch_dim (modalities.batch.DatasetBatch attribute)
(modalities.batch.InferenceResultBatch attribute)
BATCH_PROGRESS_UPDATE (modalities.logging_broker.messages.MessageTypes attribute)
BatchStateError
BF_16 (modalities.running_env.env_utils.MixedPrecisionSettings attribute)
(modalities.running_env.env_utils.PyTorchDtypes attribute)
BF_16_WORKING (modalities.running_env.env_utils.MixedPrecisionSettings attribute)
bfloat16 (modalities.utils.profilers.batch_generator.DataTypeEnum attribute)
bias (modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.components.layer_norms.LayerNormConfig attribute)
(modalities.models.components.layer_norms.RMSLayerNormConfig attribute)
(modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
bias_attn_pool (modalities.models.coca.coca_model.CoCaConfig attribute)
biases (modalities.nn.model_initialization.parameter_name_filters.RegexFilter attribute)
block_size (modalities.models.coca.coca_model.TextDecoderConfig attribute)
C
calculate_hashed_seed() (in module modalities.utils.seeding)
CAUSAL_SELF_ATTENTION (modalities.nn.attention.AttentionType attribute)
CausalSelfAttention (class in modalities.models.gpt2.gpt2_model)
check_divisibility() (modalities.models.gpt2.gpt2_model.GPT2LLMConfig method)
check_std_and_hidden_dim() (modalities.nn.model_initialization.initialization_routines.PlainInitializationConfig method)
CHECKPOINT_FOLDER_STRUCTURE (modalities.checkpointing.fsdp.fsdp_checkpoint_saving.DCPCheckpointSaving attribute)
checkpoint_path (modalities.utils.number_conversion.NumberConversionFromCheckpointPathConfig attribute)
CHECKPOINT_STRUCTURE (modalities.checkpointing.fsdp.fsdp_checkpoint_saving.FSDP1CheckpointSaving attribute)
CheckpointingEntityType (class in modalities.checkpointing.fsdp.fsdp_checkpoint_saving)
CheckpointingError
CheckpointingInstruction (class in modalities.checkpointing.checkpoint_saving_instruction)
checkpoints_to_delete (modalities.checkpointing.checkpoint_saving_instruction.CheckpointingInstruction attribute)
CheckpointSaving (class in modalities.checkpointing.checkpoint_saving)
CheckpointSavingExecutionABC (class in modalities.checkpointing.checkpoint_saving_execution)
CheckpointSavingStrategyIF (class in modalities.checkpointing.checkpoint_saving_strategies)
Chunking (class in modalities.preprocessing.create_chunks)
clip_gradients() (modalities.training.gradient_clipping.fsdp_gradient_clipper.FSDP1GradientClipper method)
(modalities.training.gradient_clipping.fsdp_gradient_clipper.FSDP1LoggingOnlyGradientClipper method)
(modalities.training.gradient_clipping.fsdp_gradient_clipper.FSDP2GradientClipper method)
(modalities.training.gradient_clipping.fsdp_gradient_clipper.FSDP2LoggingOnlyGradientClipper method)
(modalities.training.gradient_clipping.gradient_clipper.GradientClipperIF method)
CLMCrossEntropyLoss (class in modalities.loss_functions)
close() (modalities.dataloader.large_file_lines_reader.LargeFileLinesReader method)
CoCa (class in modalities.models.coca.coca_model)
COCA (modalities.nn.model_initialization.parameter_name_filters.SupportWeightInitModels attribute)
CoCaCollateFnConfig (class in modalities.models.coca.collator)
CoCaCollatorFn (class in modalities.models.coca.collator)
CoCaConfig (class in modalities.models.coca.coca_model)
CombinedDataset (class in modalities.dataloader.dataset)
compute() (modalities.utils.mfu.GPT2MFUCalculator method)
(modalities.utils.mfu.MFUCalculatorABC method)
config (modalities.conversion.gpt2.modeling_gpt2.GPT2PreTrainedModel attribute)
(modalities.models.gpt2.gpt2_model.AttentionConfig.QueryKeyValueTransformConfig attribute)
(modalities.models.gpt2.gpt2_model.LayerNormWrapperConfig attribute)
config_class (modalities.conversion.gpt2.modeling_gpt2.GPT2ForCausalLM attribute)
(modalities.conversion.gpt2.modeling_gpt2.GPT2ForQuestionAnswering attribute)
(modalities.conversion.gpt2.modeling_gpt2.GPT2ForSequenceClassification attribute)
(modalities.conversion.gpt2.modeling_gpt2.GPT2ForTokenClassification attribute)
(modalities.conversion.gpt2.modeling_gpt2.GPT2Model attribute)
(modalities.conversion.gpt2.modeling_gpt2.GPT2PreTrainedModel attribute)
ConfigError
consume_dict() (modalities.logging_broker.subscriber.MessageSubscriberIF method)
(modalities.logging_broker.subscriber_impl.progress_subscriber.DummyProgressSubscriber method)
(modalities.logging_broker.subscriber_impl.progress_subscriber.RichProgressSubscriber method)
consume_message() (modalities.logging_broker.subscriber.MessageSubscriberIF method)
(modalities.logging_broker.subscriber_impl.progress_subscriber.DummyProgressSubscriber method)
(modalities.logging_broker.subscriber_impl.progress_subscriber.RichProgressSubscriber method)
context_parallel_degree (modalities.running_env.fsdp.device_mesh.DeviceMeshConfig attribute)
convert_base_model_config_to_dict() (in module modalities.config.utils)
convert_tokenizer() (in module modalities.conversion.gpt2.conversion_tokenizer)
CP (modalities.running_env.fsdp.device_mesh.ParallelismDegrees attribute)
cpu_scalar_float() (in module modalities.util)
cpu_scalar_int() (in module modalities.util)
create_index() (modalities.dataloader.create_index.IndexGenerator method)
CROSS_ATTENTION (modalities.nn.attention.AttentionType attribute)
D
DAO_FLASH (modalities.models.gpt2.gpt2_model.AttentionImplementation attribute)
data (modalities.dataloader.create_packed_data.EmbeddedStreamData property)
data_parallel_replicate_degree (modalities.running_env.fsdp.device_mesh.DeviceMeshConfig attribute)
data_parallel_shard_degree (modalities.running_env.fsdp.device_mesh.DeviceMeshConfig attribute)
DATA_SECTION_LENGTH_IN_BYTES (modalities.dataloader.create_packed_data.EmbeddedStreamData attribute)
(modalities.dataloader.dataset.PackedMemMapDatasetBase attribute)
data_type (modalities.utils.profilers.batch_generator.RandomDatasetBatchGeneratorConfig attribute)
dataloader_tag (modalities.batch.EvaluationResultBatch attribute)
(modalities.dataloader.dataloader.LLMDataLoader property)
(modalities.logging_broker.messages.ProgressUpdate attribute)
DataloaderFactory (class in modalities.dataloader.dataloader_factory)
Dataset (class in modalities.dataloader.dataset)
dataset_path (modalities.utils.number_conversion.NumTokensFromPackedMemMapDatasetContinuousConfig attribute)
DatasetBatch (class in modalities.batch)
DatasetBatchGeneratorIF (class in modalities.utils.profilers.batch_generator)
DatasetFactory (class in modalities.dataloader.dataset_factory)
DatasetNotFoundError
DataShuffler (class in modalities.preprocessing.shuffle_data)
DataTypeEnum (class in modalities.utils.profilers.batch_generator)
DCPCheckpointLoading (class in modalities.checkpointing.fsdp.fsdp_checkpoint_loading)
DCPCheckpointSaving (class in modalities.checkpointing.fsdp.fsdp_checkpoint_saving)
debug_nan_hook() (in module modalities.utils.debug)
Debugging (class in modalities.utils.debug_components)
decimal_places (modalities.batch.ResultItem attribute)
decode() (modalities.tokenization.tokenizer_wrapper.PreTrainedHFTokenizer method)
(modalities.tokenization.tokenizer_wrapper.PreTrainedSPTokenizer method)
(modalities.tokenization.tokenizer_wrapper.TokenizerWrapper method)
DEFAULT_ATTENTION (modalities.nn.attention.AttentionEngineType attribute)
default_index_path() (modalities.dataloader.large_file_lines_reader.LargeFileLinesReader static method)
depth_init (modalities.models.gpt2.llama3_like_initialization.Llama3InitializerConfig attribute)
detach() (modalities.batch.DatasetBatch method)
(modalities.batch.InferenceResultBatch method)
(modalities.batch.TorchDeviceMixin method)
device (modalities.batch.DatasetBatch property)
(modalities.batch.InferenceResultBatch property)
(modalities.batch.TorchDeviceMixin property)
(modalities.models.coca.collator.CoCaCollatorFn attribute)
device_type (modalities.running_env.fsdp.device_mesh.DeviceMeshConfig attribute)
DeviceMeshConfig (class in modalities.running_env.fsdp.device_mesh)
dims (modalities.utils.profilers.batch_generator.RandomDatasetBatchGeneratorConfig attribute)
distribute_message() (modalities.logging_broker.message_broker.MessageBroker method)
(modalities.logging_broker.message_broker.MessageBrokerIF method)
DistributedCheckpointLoadingIF (class in modalities.checkpointing.checkpoint_loading)
dp_degree (modalities.utils.number_conversion.NumStepsFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumTokensFromNumStepsConfig attribute)
(modalities.utils.number_conversion.NumTokensFromPackedMemMapDatasetContinuousConfig attribute)
DP_REPLICATE (modalities.running_env.fsdp.device_mesh.ParallelismDegrees attribute)
DP_SHARD (modalities.running_env.fsdp.device_mesh.ParallelismDegrees attribute)
dropout (modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
DummyDataset (class in modalities.dataloader.dataset)
DummyDatasetConfig (class in modalities.dataloader.dataset)
DummyLRScheduler (class in modalities.optimizers.lr_schedulers)
DummyProgressSubscriber (class in modalities.logging_broker.subscriber_impl.progress_subscriber)
DummySampleConfig (class in modalities.dataloader.dataset)
DummySampleDataType (class in modalities.dataloader.dataset)
E
elementwise_affine (modalities.models.components.layer_norms.LayerNormConfig attribute)
EmbeddedStreamData (class in modalities.dataloader.create_packed_data)
EmptySampleError
enable_deterministic_cuda() (in module modalities.utils.debug)
enable_loss_parallel (modalities.running_env.fsdp.device_mesh.DeviceMeshConfig attribute)
enforce_swiglu_hidden_dim_multiple_of (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
eps (modalities.models.components.layer_norms.LayerNormConfig attribute)
(modalities.models.components.layer_norms.PytorchRMSLayerNormConfig attribute)
epsilon (modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.components.layer_norms.RMSLayerNormConfig attribute)
epsilon_attn_pool (modalities.models.coca.coca_model.CoCaConfig attribute)
ERROR_MESSAGE (modalities.logging_broker.messages.MessageTypes attribute)
ERRORS_FILE_REGEX (modalities.utils.benchmarking.benchmarking_utils.FileNames attribute)
EVALUATION (modalities.logging_broker.messages.ExperimentStatus attribute)
EVALUATION_RESULT (modalities.logging_broker.messages.MessageTypes attribute)
EvaluationResultBatch (class in modalities.batch)
execute_attention() (modalities.models.gpt2.gpt2_model.CausalSelfAttention class method)
execute_qkv_transforms() (modalities.models.gpt2.gpt2_model.CausalSelfAttention static method)
experiment_status (modalities.logging_broker.messages.ProgressUpdate attribute)
ExperimentStatus (class in modalities.logging_broker.messages)
F
ffn_hidden (modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
ffn_norm_config (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
FileNames (class in modalities.utils.benchmarking.benchmarking_utils)
filter_dataset() (in module modalities.dataloader.filter_packed_data)
FLOAT (modalities.dataloader.dataset.DummySampleDataType attribute)
float32 (modalities.utils.profilers.batch_generator.DataTypeEnum attribute)
format_metrics_to_gb() (in module modalities.util)
forward() (modalities.conversion.gpt2.modeling_gpt2.GPT2ForCausalLM method)
(modalities.conversion.gpt2.modeling_gpt2.GPT2Model method)
(modalities.models.coca.attention_pooling.AttentionPooling method)
(modalities.models.coca.coca_model.CoCa method)
(modalities.models.coca.multi_modal_decoder.MultiModalTextDecoder method)
(modalities.models.coca.multi_modal_decoder.TransformerBlock method)
(modalities.models.coca.text_decoder.TextDecoder method)
(modalities.models.components.layer_norms.RMSLayerNorm method)
(modalities.models.gpt2.gpt2_model.CausalSelfAttention method)
(modalities.models.gpt2.gpt2_model.GPT2Block method)
(modalities.models.gpt2.gpt2_model.GPT2LLM method)
(modalities.models.gpt2.gpt2_model.IdentityTransform method)
(modalities.models.gpt2.gpt2_model.QueryKeyValueTransform method)
(modalities.models.gpt2.gpt2_model.RotaryTransform method)
(modalities.models.gpt2.gpt2_model.TransformerMLP method)
(modalities.models.huggingface.huggingface_model.HuggingFacePretrainedModel method)
(modalities.models.model.NNModel method)
(modalities.models.model.SwiGLU method)
(modalities.models.vision_transformer.vision_transformer_model.ImagePatchEmbedding method)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformer method)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformerBlock method)
(modalities.nn.attention.MultiHeadAttention method)
(modalities.nn.mlp.MLP method)
forward_images() (modalities.models.vision_transformer.vision_transformer_model.VisionTransformer method)
forward_impl() (modalities.models.gpt2.gpt2_model.GPT2LLM method)
FP_16 (modalities.running_env.env_utils.MixedPrecisionSettings attribute)
(modalities.running_env.env_utils.PyTorchDtypes attribute)
FP_32 (modalities.running_env.env_utils.MixedPrecisionSettings attribute)
(modalities.running_env.env_utils.PyTorchDtypes attribute)
FQNsPerStageGeneratorConfig (class in modalities.models.parallelism.stages_generator_configs)
FSDP1CheckpointLoading (class in modalities.checkpointing.fsdp.fsdp_checkpoint_loading)
FSDP1CheckpointLoadingIF (class in modalities.checkpointing.checkpoint_loading)
FSDP1CheckpointSaving (class in modalities.checkpointing.fsdp.fsdp_checkpoint_saving)
FSDP1GradientClipper (class in modalities.training.gradient_clipping.fsdp_gradient_clipper)
FSDP1LoggingOnlyGradientClipper (class in modalities.training.gradient_clipping.fsdp_gradient_clipper)
FSDP2GradientClipper (class in modalities.training.gradient_clipping.fsdp_gradient_clipper)
FSDP2LoggingOnlyGradientClipper (class in modalities.training.gradient_clipping.fsdp_gradient_clipper)
FSDP2MixedPrecisionSettings (class in modalities.running_env.env_utils)
fsdp_block_names (modalities.models.huggingface.huggingface_model.HuggingFacePretrainedModel property)
FSDPAutoWrapFactoryIF (class in modalities.running_env.fsdp.fsdp_auto_wrapper)
FSDPAutoWrapFactoryTypes (class in modalities.running_env.fsdp.fsdp_auto_wrapper)
FSDPTransformerAutoWrapPolicyFactory (class in modalities.running_env.fsdp.fsdp_auto_wrapper)
FULL_ACTIVATION_CHECKPOINTING (modalities.training.activation_checkpointing.activation_checkpointing_variants.ActivationCheckpointingVariants attribute)
G
GELU (modalities.models.model.ActivationType attribute)
generate_sweep_configs() (modalities.utils.benchmarking.sweep_utils.SweepGenerator static method)
generate_tokens() (modalities.inference.text.inference_component.TextInferenceComponent method)
get_adam() (modalities.optimizers.optimizer_factory.OptimizerFactory static method)
get_adam_w() (modalities.optimizers.optimizer_factory.OptimizerFactory static method)
get_auto_wrap_policy() (modalities.running_env.fsdp.fsdp_auto_wrapper.FSDPAutoWrapFactoryIF method)
(modalities.running_env.fsdp.fsdp_auto_wrapper.FSDPTransformerAutoWrapPolicyFactory method)
get_checkpoint_instruction() (modalities.checkpointing.checkpoint_saving_strategies.CheckpointSavingStrategyIF method)
(modalities.checkpointing.checkpoint_saving_strategies.SaveEveryKStepsCheckpointingStrategy method)
(modalities.checkpointing.checkpoint_saving_strategies.SaveKMostRecentCheckpointsStrategy method)
get_combined_dataset() (modalities.dataloader.dataset_factory.DatasetFactory static method)
get_current_sweep_status() (in module modalities.utils.benchmarking.benchmarking_utils)
get_dataloader() (modalities.dataloader.dataloader_factory.DataloaderFactory static method)
get_dataset_batch() (modalities.utils.profilers.batch_generator.DatasetBatchGeneratorIF method)
(modalities.utils.profilers.batch_generator.RandomDatasetBatchGenerator method)
get_dcp_checkpointed_app_state_() (modalities.checkpointing.stateful.app_state_factory.AppStateFactory static method)
get_decoder() (modalities.conversion.gpt2.modeling_gpt2.GPT2ForCausalLM method)
get_device_mesh() (in module modalities.running_env.fsdp.device_mesh)
get_dummy_dataset() (modalities.dataloader.dataset_factory.DatasetFactory static method)
get_experiment_id_from_config() (in module modalities.util)
get_file_md5sum() (in module modalities.utils.file_ops)
get_fsdp1_checkpointed_optimizer_() (modalities.optimizers.optimizer_factory.OptimizerFactory static method)
get_global_num_seen_tokens_from_checkpoint_path() (modalities.utils.number_conversion.NumberConversion static method)
get_global_num_target_tokens_from_checkpoint_path() (modalities.utils.number_conversion.NumberConversion static method)
get_jsonl_file_chunk() (modalities.preprocessing.create_chunks.Chunking static method)
get_last_lr() (modalities.optimizers.scheduler_list.SchedulerList method)
get_last_step_from_checkpoint_path() (modalities.utils.number_conversion.NumberConversion static method)
get_local_num_batches_from_num_samples() (modalities.utils.number_conversion.NumberConversion static method)
get_local_num_batches_from_num_tokens() (modalities.utils.number_conversion.NumberConversion static method)
get_local_number_of_trainable_parameters() (in module modalities.util)
get_logger() (in module modalities.utils.logger_utils)
get_lr() (modalities.optimizers.lr_schedulers.DummyLRScheduler method)
(modalities.optimizers.scheduler_list.SchedulerList method)
get_mem_map_dataset() (modalities.dataloader.dataset_factory.DatasetFactory static method)
get_mesh_for_parallelism_method() (in module modalities.running_env.fsdp.device_mesh)
get_module_class_from_name() (in module modalities.util)
get_num_samples_from_num_tokens() (modalities.utils.number_conversion.NumberConversion static method)
get_num_seen_steps_from_checkpoint_path() (modalities.utils.number_conversion.NumberConversion static method)
get_num_steps_from_num_samples() (modalities.utils.number_conversion.NumberConversion static method)
get_num_steps_from_num_tokens() (modalities.utils.number_conversion.NumberConversion static method)
get_num_steps_from_raw_dataset_index() (modalities.utils.number_conversion.NumberConversion static method)
get_num_target_steps_from_checkpoint_path() (modalities.utils.number_conversion.NumberConversion static method)
get_num_tokens_from_num_steps() (modalities.utils.number_conversion.NumberConversion static method)
get_num_tokens_from_packed_mem_map_dataset_continuous() (modalities.utils.number_conversion.NumberConversion static method)
get_optimizer_groups() (in module modalities.optimizers.optimizer_factory)
get_packed_mem_map_dataset_continuous() (modalities.dataloader.dataset_factory.DatasetFactory static method)
get_packed_mem_map_dataset_megatron() (modalities.dataloader.dataset_factory.DatasetFactory static method)
get_parallel_degree() (in module modalities.running_env.fsdp.device_mesh)
get_parallel_rank() (in module modalities.running_env.fsdp.device_mesh)
get_parameters() (modalities.models.model.NNModel method)
get_plain_initialization() (modalities.nn.model_initialization.initialization_routines.InitializationRoutines static method)
get_predictions() (modalities.batch.InferenceResultBatch method)
get_raw_app_state() (modalities.checkpointing.stateful.app_state_factory.AppStateFactory static method)
get_raw_index() (modalities.dataloader.dataset_factory.DatasetFactory static method)
get_scaled_embed_initialization() (modalities.nn.model_initialization.initialization_routines.InitializationRoutines static method)
get_scaled_initialization() (modalities.nn.model_initialization.initialization_routines.InitializationRoutines static method)
get_stages() (modalities.models.parallelism.stages_generator.StagesGenerator method)
get_state_dict() (modalities.checkpointing.stateful.app_state.LRSchedulerStateRetriever static method)
(modalities.checkpointing.stateful.app_state.ModelStateRetriever static method)
(modalities.checkpointing.stateful.app_state.OptimizerStateRetriever static method)
(modalities.checkpointing.stateful.app_state.StateRetrieverIF static method)
get_synced_experiment_id_of_run() (in module modalities.util)
get_synced_string() (in module modalities.util)
get_targets() (modalities.batch.InferenceResultBatch method)
get_token_id() (modalities.tokenization.tokenizer_wrapper.PreTrainedHFTokenizer method)
(modalities.tokenization.tokenizer_wrapper.PreTrainedSPTokenizer method)
(modalities.tokenization.tokenizer_wrapper.TokenizerWrapper method)
get_tokenized_file_chunk() (modalities.preprocessing.create_chunks.Chunking static method)
get_total_number_of_trainable_parameters() (in module modalities.util)
get_updated_sweep_status() (in module modalities.utils.benchmarking.benchmarking_utils)
global_num_samples (modalities.utils.number_conversion.LocalNumBatchesFromNumSamplesConfig attribute)
(modalities.utils.number_conversion.NumStepsFromNumSamplesConfig attribute)
global_num_tokens (modalities.utils.number_conversion.LocalNumBatchesFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumStepsFromNumTokensConfig attribute)
global_rank (modalities.logging_broker.messages.Message attribute)
GPT2 (modalities.nn.model_initialization.parameter_name_filters.SupportWeightInitModels attribute)
GPT2Block (class in modalities.models.gpt2.gpt2_model)
GPT2Config (class in modalities.conversion.gpt2.configuration_gpt2)
GPT2ForCausalLM (class in modalities.conversion.gpt2.modeling_gpt2)
GPT2ForQuestionAnswering (class in modalities.conversion.gpt2.modeling_gpt2)
GPT2ForSequenceClassification (class in modalities.conversion.gpt2.modeling_gpt2)
GPT2ForTokenClassification (class in modalities.conversion.gpt2.modeling_gpt2)
GPT2LLM (class in modalities.models.gpt2.gpt2_model)
GPT2LLMCollateFn (class in modalities.models.gpt2.collator)
GPT2LLMConfig (class in modalities.models.gpt2.gpt2_model)
GPT2LLMStagesGenerator (class in modalities.models.parallelism.stages_generator)
GPT2LLMStagesGeneratorConfig (class in modalities.models.parallelism.stages_generator_configs)
GPT2MFUCalculator (class in modalities.utils.mfu)
GPT2Model (class in modalities.conversion.gpt2.modeling_gpt2)
GPT2PreTrainedModel (class in modalities.conversion.gpt2.modeling_gpt2)
gradient_accumulation_steps (modalities.utils.number_conversion.NumStepsFromNumSamplesConfig attribute)
(modalities.utils.number_conversion.NumStepsFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumStepsFromRawDatasetIndexConfig attribute)
(modalities.utils.number_conversion.NumTokensFromNumStepsConfig attribute)
(modalities.utils.number_conversion.NumTokensFromPackedMemMapDatasetContinuousConfig attribute)
GradientClipperIF (class in modalities.training.gradient_clipping.gradient_clipper)
GradientClippingMode (class in modalities.training.gradient_clipping.fsdp_gradient_clipper)
H
has_bfloat_support() (in module modalities.running_env.env_utils)
has_parallelism_method() (in module modalities.running_env.fsdp.device_mesh)
HEADER_SIZE_IN_BYTES (modalities.dataloader.create_packed_data.EmbeddedStreamData attribute)
(modalities.dataloader.dataset.PackedMemMapDatasetBase attribute)
hidden_dim (modalities.nn.model_initialization.initialization_routines.PlainInitializationConfig attribute)
HIGH_LEVEL_PROGRESS_UPDATE (modalities.logging_broker.messages.MessageTypes attribute)
HookRegistration (class in modalities.utils.debug_components)
huggingface_prediction_subscription_key (modalities.models.huggingface.huggingface_model.HuggingFacePretrainedModelConfig attribute)
HuggingFaceModelTypes (class in modalities.models.huggingface.huggingface_model)
HuggingFacePretrainedModel (class in modalities.models.huggingface.huggingface_model)
HuggingFacePretrainedModelConfig (class in modalities.models.huggingface.huggingface_model)
I
IdentityTransform (class in modalities.models.gpt2.gpt2_model)
ImagePatchEmbedding (class in modalities.models.vision_transformer.vision_transformer_model)
img_size (modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
index_base (modalities.dataloader.create_packed_data.EmbeddedStreamData property)
IndexGenerator (class in modalities.dataloader.create_index)
InferenceResultBatch (class in modalities.batch)
InitializationRoutines (class in modalities.nn.model_initialization.initialization_routines)
initialize_in_place() (modalities.models.gpt2.llama3_like_initialization.Llama3Initializer method)
(modalities.nn.model_initialization.initialization_if.ModelInitializationIF method)
(modalities.nn.model_initialization.initialization_routines.NamedParameterwiseNormalInitialization method)
input_layer_equivalence (modalities.models.parallelism.stages_generator_configs.GPT2LLMStagesGeneratorConfig attribute)
INT (modalities.dataloader.dataset.DummySampleDataType attribute)
int64 (modalities.utils.profilers.batch_generator.DataTypeEnum attribute)
is_loaded (modalities.checkpointing.stateful.app_state.AppState property)
is_running_with_torchrun() (in module modalities.running_env.env_utils)
is_special_token_id() (modalities.tokenization.tokenizer_wrapper.PreTrainedHFTokenizer method)
(modalities.tokenization.tokenizer_wrapper.PreTrainedSPTokenizer method)
(modalities.tokenization.tokenizer_wrapper.TokenizerWrapper method)
J
join_embedded_stream_data() (in module modalities.dataloader.create_packed_data)
K
keys_to_ignore_at_inference (modalities.conversion.gpt2.configuration_gpt2.GPT2Config attribute)
kwargs (modalities.models.huggingface.huggingface_model.HuggingFacePretrainedModelConfig attribute)
L
LargeFileLinesReader (class in modalities.dataloader.large_file_lines_reader)
last_epoch (modalities.optimizers.scheduler_list.SchedulerList property)
LayerNormConfig (class in modalities.models.components.layer_norms)
LayerNorms (class in modalities.models.gpt2.gpt2_model)
LayerNormWrapperConfig (class in modalities.models.gpt2.gpt2_model)
Llama3Initializer (class in modalities.models.gpt2.llama3_like_initialization)
Llama3InitializerConfig (class in modalities.models.gpt2.llama3_like_initialization)
LLMDataLoader (class in modalities.dataloader.dataloader)
lm_head_norm_config (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
load_checkpoint_() (modalities.checkpointing.checkpoint_loading.DistributedCheckpointLoadingIF method)
(modalities.checkpointing.fsdp.fsdp_checkpoint_loading.DCPCheckpointLoading method)
load_model_checkpoint() (modalities.checkpointing.checkpoint_loading.FSDP1CheckpointLoadingIF method)
(modalities.checkpointing.fsdp.fsdp_checkpoint_loading.FSDP1CheckpointLoading method)
load_optimizer_checkpoint_() (modalities.checkpointing.checkpoint_loading.FSDP1CheckpointLoadingIF method)
(modalities.checkpointing.fsdp.fsdp_checkpoint_loading.FSDP1CheckpointLoading method)
load_state_dict() (modalities.checkpointing.stateful.app_state.AppState method)
(modalities.optimizers.optimizer_list.OptimizersList method)
(modalities.optimizers.scheduler_list.SchedulerList method)
load_state_dict_() (modalities.checkpointing.stateful.app_state.LRSchedulerStateRetriever static method)
(modalities.checkpointing.stateful.app_state.ModelStateRetriever static method)
(modalities.checkpointing.stateful.app_state.OptimizerStateRetriever static method)
(modalities.checkpointing.stateful.app_state.StateRetrieverIF static method)
local_micro_batch_size (modalities.utils.number_conversion.LocalNumBatchesFromNumSamplesConfig attribute)
(modalities.utils.number_conversion.LocalNumBatchesFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumStepsFromNumSamplesConfig attribute)
(modalities.utils.number_conversion.NumStepsFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumStepsFromRawDatasetIndexConfig attribute)
(modalities.utils.number_conversion.NumTokensFromNumStepsConfig attribute)
(modalities.utils.number_conversion.NumTokensFromPackedMemMapDatasetContinuousConfig attribute)
local_rank (modalities.logging_broker.messages.Message attribute)
LocalNumBatchesFromNumSamplesConfig (class in modalities.utils.number_conversion)
LocalNumBatchesFromNumTokensConfig (class in modalities.utils.number_conversion)
LookupEnum (class in modalities.config.lookup_enum)
Loss (class in modalities.loss_functions)
losses (modalities.batch.EvaluationResultBatch attribute)
lr_scheduler (modalities.checkpointing.stateful.app_state.AppState property)
LR_SCHEDULER (modalities.checkpointing.stateful.app_state.StatefulComponents attribute)
LRSchedulerStateRetriever (class in modalities.checkpointing.stateful.app_state)
M
MANUAL (modalities.models.gpt2.gpt2_model.AttentionImplementation attribute)
manual_scaled_dot_product_attention() (in module modalities.models.gpt2.gpt2_model)
MAX_NORM (modalities.training.gradient_clipping.fsdp_gradient_clipper.GradientClippingMode attribute)
max_val (modalities.utils.profilers.batch_generator.RandomDatasetBatchGeneratorConfig attribute)
maybe_list_parameter() (in module modalities.utils.maybe_list_parameter)
mean (modalities.nn.model_initialization.initialization_routines.PlainInitializationConfig attribute)
(modalities.nn.model_initialization.initialization_routines.ScaledEmbedInitializationConfig attribute)
(modalities.nn.model_initialization.initialization_routines.ScaledInitializationConfig attribute)
MemMapDataset (class in modalities.dataloader.dataset)
MEMORY_SNAPSHOT_MAX_ENTRIES (modalities.utils.profilers.profilers.SteppableMemoryProfiler attribute)
Message (class in modalities.logging_broker.messages)
message_type (modalities.logging_broker.messages.Message attribute)
MessageBroker (class in modalities.logging_broker.message_broker)
MessageBrokerIF (class in modalities.logging_broker.message_broker)
MessagePublisher (class in modalities.logging_broker.publisher)
MessagePublisherIF (class in modalities.logging_broker.publisher)
MessageSubscriberIF (class in modalities.logging_broker.subscriber)
MessageTypes (class in modalities.logging_broker.messages)
metrics (modalities.batch.EvaluationResultBatch attribute)
MFUCalculatorABC (class in modalities.utils.mfu)
min_val (modalities.utils.profilers.batch_generator.RandomDatasetBatchGeneratorConfig attribute)
MIXED_PRECISION_MEGATRON (modalities.running_env.env_utils.MixedPrecisionSettings attribute)
MixedPrecisionSettings (class in modalities.running_env.env_utils)
MLP (class in modalities.nn.mlp)
modalities
module
modalities.batch
module
modalities.checkpointing
module
modalities.checkpointing.checkpoint_loading
module
modalities.checkpointing.checkpoint_saving
module
modalities.checkpointing.checkpoint_saving_execution
module
modalities.checkpointing.checkpoint_saving_instruction
module
modalities.checkpointing.checkpoint_saving_strategies
module
modalities.checkpointing.fsdp
module
modalities.checkpointing.fsdp.fsdp_checkpoint_loading
module
modalities.checkpointing.fsdp.fsdp_checkpoint_saving
module
modalities.checkpointing.stateful
module
modalities.checkpointing.stateful.app_state
module
modalities.checkpointing.stateful.app_state_factory
module
modalities.checkpointing.torch
module
modalities.config
module
modalities.config.lookup_enum
module
modalities.config.utils
module
modalities.conversion
module
modalities.conversion.gpt2
module
modalities.conversion.gpt2.configuration_gpt2
module
modalities.conversion.gpt2.conversion_code
module
modalities.conversion.gpt2.conversion_tokenizer
module
modalities.conversion.gpt2.modeling_gpt2
module
modalities.dataloader
module
modalities.dataloader.create_index
module
modalities.dataloader.create_packed_data
module
modalities.dataloader.dataloader
module
modalities.dataloader.dataloader_factory
module
modalities.dataloader.dataset
module
modalities.dataloader.dataset_factory
module
modalities.dataloader.filter_packed_data
module
modalities.dataloader.large_file_lines_reader
module
modalities.dataloader.preprocessing
module
modalities.dataloader.samplers
module
modalities.exceptions
module
modalities.inference
module
modalities.inference.text
module
modalities.inference.text.inference_component
module
modalities.logging_broker
module
modalities.logging_broker.message_broker
module
modalities.logging_broker.messages
module
modalities.logging_broker.publisher
module
modalities.logging_broker.subscriber
module
modalities.logging_broker.subscriber_impl
module
modalities.logging_broker.subscriber_impl.progress_subscriber
module
modalities.loss_functions
module
modalities.models
module
modalities.models.coca
module
modalities.models.coca.attention_pooling
module
modalities.models.coca.coca_model
module
modalities.models.coca.collator
module
modalities.models.coca.multi_modal_decoder
module
modalities.models.coca.text_decoder
module
modalities.models.components
module
modalities.models.components.layer_norms
module
modalities.models.gpt2
module
modalities.models.gpt2.collator
module
modalities.models.gpt2.gpt2_model
module
modalities.models.gpt2.llama3_like_initialization
module
modalities.models.huggingface
module
modalities.models.huggingface.huggingface_model
module
modalities.models.huggingface_adapters
module
modalities.models.model
module
modalities.models.parallelism
module
modalities.models.parallelism.stages_generator
module
modalities.models.parallelism.stages_generator_configs
module
modalities.models.vision_transformer
module
modalities.models.vision_transformer.vision_transformer_model
module
modalities.nn
module
modalities.nn.attention
module
modalities.nn.mlp
module
modalities.nn.model_initialization
module
modalities.nn.model_initialization.initialization_if
module
modalities.nn.model_initialization.initialization_routines
module
modalities.nn.model_initialization.parameter_name_filters
module
modalities.optimizers
module
modalities.optimizers.lr_schedulers
module
modalities.optimizers.optimizer_factory
module
modalities.optimizers.optimizer_list
module
modalities.optimizers.scheduler_list
module
modalities.preprocessing
module
modalities.preprocessing.create_chunks
module
modalities.preprocessing.shuffle_data
module
modalities.registry
module
modalities.running_env
module
modalities.running_env.env_utils
module
modalities.running_env.fsdp
module
modalities.running_env.fsdp.device_mesh
module
modalities.running_env.fsdp.fsdp_auto_wrapper
module
modalities.running_env.fsdp.reducer
module
modalities.tokenization
module
modalities.tokenization.tokenizer_wrapper
module
modalities.training
module
modalities.training.activation_checkpointing
module
modalities.training.activation_checkpointing.activation_checkpointing_variants
module
modalities.training.gradient_clipping
module
modalities.training.gradient_clipping.fsdp_gradient_clipper
module
modalities.training.gradient_clipping.gradient_clipper
module
modalities.training.training_progress
module
modalities.util
module
modalities.utils
module
modalities.utils.benchmarking
module
modalities.utils.benchmarking.benchmarking_utils
module
modalities.utils.benchmarking.sweep_utils
module
modalities.utils.communication_test
module
modalities.utils.debug
module
modalities.utils.debug_components
module
modalities.utils.deprecated_alias
module
modalities.utils.file_ops
module
modalities.utils.logger_utils
module
modalities.utils.maybe_list_parameter
module
modalities.utils.mfu
module
modalities.utils.number_conversion
module
modalities.utils.profilers
module
modalities.utils.profilers.batch_generator
module
modalities.utils.profilers.profilers
module
modalities.utils.profilers.steppable_components
module
modalities.utils.profilers.steppable_components_if
module
modalities.utils.seeding
module
modalities.utils.typing_utils
module
MODEL (modalities.checkpointing.fsdp.fsdp_checkpoint_saving.CheckpointingEntityType attribute)
(modalities.checkpointing.stateful.app_state.StatefulComponents attribute)
model_args (modalities.models.huggingface.huggingface_model.HuggingFacePretrainedModelConfig attribute)
model_config (modalities.dataloader.dataset.DummyDatasetConfig attribute)
(modalities.dataloader.dataset.DummySampleConfig attribute)
(modalities.models.coca.coca_model.CoCaConfig attribute)
(modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.coca.collator.CoCaCollateFnConfig attribute)
(modalities.models.components.layer_norms.LayerNormConfig attribute)
(modalities.models.components.layer_norms.PytorchRMSLayerNormConfig attribute)
(modalities.models.components.layer_norms.RMSLayerNormConfig attribute)
(modalities.models.gpt2.gpt2_model.AttentionConfig attribute)
(modalities.models.gpt2.gpt2_model.AttentionConfig.QueryKeyValueTransformConfig attribute)
(modalities.models.gpt2.gpt2_model.AttentionConfig.QueryKeyValueTransformConfig.IdentityTransformConfig attribute)
(modalities.models.gpt2.gpt2_model.AttentionConfig.QueryKeyValueTransformConfig.RotaryTransformConfig attribute)
(modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
(modalities.models.gpt2.gpt2_model.LayerNormWrapperConfig attribute)
(modalities.models.gpt2.llama3_like_initialization.Llama3InitializerConfig attribute)
(modalities.models.huggingface.huggingface_model.HuggingFacePretrainedModelConfig attribute)
(modalities.models.parallelism.stages_generator_configs.FQNsPerStageGeneratorConfig attribute)
(modalities.models.parallelism.stages_generator_configs.GPT2LLMStagesGeneratorConfig attribute)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
(modalities.nn.attention.AttentionConfig attribute)
(modalities.nn.model_initialization.initialization_routines.PlainInitializationConfig attribute)
(modalities.nn.model_initialization.initialization_routines.ScaledEmbedInitializationConfig attribute)
(modalities.nn.model_initialization.initialization_routines.ScaledInitializationConfig attribute)
(modalities.nn.model_initialization.parameter_name_filters.RegexFilter attribute)
(modalities.running_env.env_utils.FSDP2MixedPrecisionSettings attribute)
(modalities.running_env.fsdp.device_mesh.DeviceMeshConfig attribute)
(modalities.utils.benchmarking.sweep_utils.SweepConfig attribute)
(modalities.utils.number_conversion.LocalNumBatchesFromNumSamplesConfig attribute)
(modalities.utils.number_conversion.LocalNumBatchesFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumberConversionFromCheckpointPathConfig attribute)
(modalities.utils.number_conversion.NumSamplesFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumStepsFromNumSamplesConfig attribute)
(modalities.utils.number_conversion.NumStepsFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumStepsFromRawDatasetIndexConfig attribute)
(modalities.utils.number_conversion.NumTokensFromNumStepsConfig attribute)
(modalities.utils.number_conversion.NumTokensFromPackedMemMapDatasetContinuousConfig attribute)
(modalities.utils.profilers.batch_generator.RandomDatasetBatchGeneratorConfig attribute)
model_name (modalities.models.huggingface.huggingface_model.HuggingFacePretrainedModelConfig attribute)
model_parts (modalities.checkpointing.stateful.app_state.AppState property)
model_predict_batch() (in module modalities.models.model)
model_type (modalities.conversion.gpt2.configuration_gpt2.GPT2Config attribute)
(modalities.models.huggingface.huggingface_model.HuggingFacePretrainedModelConfig attribute)
ModelInitializationIF (class in modalities.nn.model_initialization.initialization_if)
ModelStateError
ModelStateRetriever (class in modalities.checkpointing.stateful.app_state)
module
modalities
modalities.batch
modalities.checkpointing
modalities.checkpointing.checkpoint_loading
modalities.checkpointing.checkpoint_saving
modalities.checkpointing.checkpoint_saving_execution
modalities.checkpointing.checkpoint_saving_instruction
modalities.checkpointing.checkpoint_saving_strategies
modalities.checkpointing.fsdp
modalities.checkpointing.fsdp.fsdp_checkpoint_loading
modalities.checkpointing.fsdp.fsdp_checkpoint_saving
modalities.checkpointing.stateful
modalities.checkpointing.stateful.app_state
modalities.checkpointing.stateful.app_state_factory
modalities.checkpointing.torch
modalities.config
modalities.config.lookup_enum
modalities.config.utils
modalities.conversion
modalities.conversion.gpt2
modalities.conversion.gpt2.configuration_gpt2
modalities.conversion.gpt2.conversion_code
modalities.conversion.gpt2.conversion_tokenizer
modalities.conversion.gpt2.modeling_gpt2
modalities.dataloader
modalities.dataloader.create_index
modalities.dataloader.create_packed_data
modalities.dataloader.dataloader
modalities.dataloader.dataloader_factory
modalities.dataloader.dataset
modalities.dataloader.dataset_factory
modalities.dataloader.filter_packed_data
modalities.dataloader.large_file_lines_reader
modalities.dataloader.preprocessing
modalities.dataloader.samplers
modalities.exceptions
modalities.inference
modalities.inference.text
modalities.inference.text.inference_component
modalities.logging_broker
modalities.logging_broker.message_broker
modalities.logging_broker.messages
modalities.logging_broker.publisher
modalities.logging_broker.subscriber
modalities.logging_broker.subscriber_impl
modalities.logging_broker.subscriber_impl.progress_subscriber
modalities.loss_functions
modalities.models
modalities.models.coca
modalities.models.coca.attention_pooling
modalities.models.coca.coca_model
modalities.models.coca.collator
modalities.models.coca.multi_modal_decoder
modalities.models.coca.text_decoder
modalities.models.components
modalities.models.components.layer_norms
modalities.models.gpt2
modalities.models.gpt2.collator
modalities.models.gpt2.gpt2_model
modalities.models.gpt2.llama3_like_initialization
modalities.models.huggingface
modalities.models.huggingface.huggingface_model
modalities.models.huggingface_adapters
modalities.models.model
modalities.models.parallelism
modalities.models.parallelism.stages_generator
modalities.models.parallelism.stages_generator_configs
modalities.models.vision_transformer
modalities.models.vision_transformer.vision_transformer_model
modalities.nn
modalities.nn.attention
modalities.nn.mlp
modalities.nn.model_initialization
modalities.nn.model_initialization.initialization_if
modalities.nn.model_initialization.initialization_routines
modalities.nn.model_initialization.parameter_name_filters
modalities.optimizers
modalities.optimizers.lr_schedulers
modalities.optimizers.optimizer_factory
modalities.optimizers.optimizer_list
modalities.optimizers.scheduler_list
modalities.preprocessing
modalities.preprocessing.create_chunks
modalities.preprocessing.shuffle_data
modalities.registry
modalities.running_env
modalities.running_env.env_utils
modalities.running_env.fsdp
modalities.running_env.fsdp.device_mesh
modalities.running_env.fsdp.fsdp_auto_wrapper
modalities.running_env.fsdp.reducer
modalities.tokenization
modalities.tokenization.tokenizer_wrapper
modalities.training
modalities.training.activation_checkpointing
modalities.training.activation_checkpointing.activation_checkpointing_variants
modalities.training.gradient_clipping
modalities.training.gradient_clipping.fsdp_gradient_clipper
modalities.training.gradient_clipping.gradient_clipper
modalities.training.training_progress
modalities.util
modalities.utils
modalities.utils.benchmarking
modalities.utils.benchmarking.benchmarking_utils
modalities.utils.benchmarking.sweep_utils
modalities.utils.communication_test
modalities.utils.debug
modalities.utils.debug_components
modalities.utils.deprecated_alias
modalities.utils.file_ops
modalities.utils.logger_utils
modalities.utils.maybe_list_parameter
modalities.utils.mfu
modalities.utils.number_conversion
modalities.utils.profilers
modalities.utils.profilers.batch_generator
modalities.utils.profilers.profilers
modalities.utils.profilers.steppable_components
modalities.utils.profilers.steppable_components_if
modalities.utils.seeding
modalities.utils.typing_utils
MOST_RECENT_CONFIGS (modalities.utils.benchmarking.benchmarking_utils.SweepSets attribute)
MultiHeadAttention (class in modalities.nn.attention)
MultiModalTextDecoder (class in modalities.models.coca.multi_modal_decoder)
N
n_classes (modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
n_embd (modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.gpt2.gpt2_model.AttentionConfig.QueryKeyValueTransformConfig.RotaryTransformConfig attribute)
(modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
(modalities.models.gpt2.llama3_like_initialization.Llama3InitializerConfig attribute)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
n_head (modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.gpt2.gpt2_model.AttentionConfig.QueryKeyValueTransformConfig.RotaryTransformConfig attribute)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
n_head_kv (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
n_head_q (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
n_img_channels (modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
n_layer (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
n_layer_multimodal_text (modalities.models.coca.coca_model.TextDecoderConfig attribute)
n_layer_text (modalities.models.coca.coca_model.TextDecoderConfig attribute)
n_pool_head (modalities.models.coca.coca_model.CoCaConfig attribute)
n_vision_queries (modalities.models.coca.coca_model.CoCaConfig attribute)
NamedParameterwiseNormalInitialization (class in modalities.nn.model_initialization.initialization_routines)
nce_loss() (in module modalities.loss_functions)
NCELoss (class in modalities.loss_functions)
ndim (modalities.models.components.layer_norms.RMSLayerNormConfig attribute)
NNModel (class in modalities.models.model)
NO_MIXED_PRECISION (modalities.running_env.env_utils.MixedPrecisionSettings attribute)
NON_CAUSAL_SELF_ATTENTION (modalities.nn.attention.AttentionType attribute)
NOPE (modalities.models.gpt2.gpt2_model.PositionTypes attribute)
norm_type (modalities.models.gpt2.gpt2_model.LayerNormWrapperConfig attribute)
normalized_shape (modalities.models.components.layer_norms.LayerNormConfig attribute)
(modalities.models.components.layer_norms.PytorchRMSLayerNormConfig attribute)
np_dtype_of_tokens_on_disk_from_bytes (modalities.dataloader.dataset.PackedMemMapDatasetBase attribute)
num_layers (modalities.models.gpt2.llama3_like_initialization.Llama3InitializerConfig attribute)
(modalities.nn.model_initialization.initialization_routines.ScaledInitializationConfig attribute)
num_model_layers (modalities.models.parallelism.stages_generator_configs.GPT2LLMStagesGeneratorConfig attribute)
num_ranks (modalities.utils.number_conversion.LocalNumBatchesFromNumSamplesConfig attribute)
(modalities.utils.number_conversion.LocalNumBatchesFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumStepsFromNumSamplesConfig attribute)
(modalities.utils.number_conversion.NumStepsFromRawDatasetIndexConfig attribute)
num_samples (modalities.dataloader.dataset.DummyDatasetConfig attribute)
num_seen_steps_current_run (modalities.training.training_progress.TrainingProgress attribute)
num_seen_steps_previous_run (modalities.training.training_progress.TrainingProgress attribute)
num_seen_steps_total (modalities.training.training_progress.TrainingProgress property)
num_seen_tokens_current_run (modalities.training.training_progress.TrainingProgress attribute)
num_seen_tokens_previous_run (modalities.training.training_progress.TrainingProgress attribute)
num_seen_tokens_total (modalities.training.training_progress.TrainingProgress property)
num_steps (modalities.utils.number_conversion.NumTokensFromNumStepsConfig attribute)
num_steps_done (modalities.logging_broker.messages.ProgressUpdate attribute)
num_target_steps (modalities.training.training_progress.TrainingProgress attribute)
num_target_tokens (modalities.training.training_progress.TrainingProgress attribute)
num_tokens (modalities.utils.number_conversion.NumSamplesFromNumTokensConfig attribute)
num_train_steps_done (modalities.batch.EvaluationResultBatch attribute)
NumberConversion (class in modalities.utils.number_conversion)
NumberConversionFromCheckpointPathConfig (class in modalities.utils.number_conversion)
NumSamplesFromNumTokensConfig (class in modalities.utils.number_conversion)
NumStepsFromNumSamplesConfig (class in modalities.utils.number_conversion)
NumStepsFromNumTokensConfig (class in modalities.utils.number_conversion)
NumStepsFromRawDatasetIndexConfig (class in modalities.utils.number_conversion)
NumTokensFromNumStepsConfig (class in modalities.utils.number_conversion)
NumTokensFromPackedMemMapDatasetContinuousConfig (class in modalities.utils.number_conversion)
O
OPTIMIZER (modalities.checkpointing.fsdp.fsdp_checkpoint_saving.CheckpointingEntityType attribute)
optimizer (modalities.checkpointing.stateful.app_state.AppState property)
OPTIMIZER (modalities.checkpointing.stateful.app_state.StatefulComponents attribute)
OptimizerError
OptimizerFactory (class in modalities.optimizers.optimizer_factory)
OptimizersList (class in modalities.optimizers.optimizer_list)
OptimizerStateRetriever (class in modalities.checkpointing.stateful.app_state)
output_layer_equivalence (modalities.models.parallelism.stages_generator_configs.GPT2LLMStagesGeneratorConfig attribute)
P
P1_NORM (modalities.training.gradient_clipping.fsdp_gradient_clipper.GradientClippingMode attribute)
P2_NORM (modalities.training.gradient_clipping.fsdp_gradient_clipper.GradientClippingMode attribute)
PackedDataGenerator (class in modalities.dataloader.create_packed_data)
PackedMemMapDatasetBase (class in modalities.dataloader.dataset)
PackedMemMapDatasetContinuous (class in modalities.dataloader.dataset)
PackedMemMapDatasetMegatron (class in modalities.dataloader.dataset)
paired (modalities.utils.benchmarking.sweep_utils.SweepConfig attribute)
ParallelismDegrees (class in modalities.running_env.fsdp.device_mesh)
param_dtype (modalities.running_env.env_utils.FSDP2MixedPrecisionSettings attribute)
parameter_name_regexes (modalities.nn.model_initialization.initialization_routines.PlainInitializationConfig attribute)
(modalities.nn.model_initialization.initialization_routines.ScaledEmbedInitializationConfig attribute)
(modalities.nn.model_initialization.initialization_routines.ScaledInitializationConfig attribute)
parse_enum_by_name() (in module modalities.util)
parse_sharding_strategy_by_name() (modalities.models.gpt2.gpt2_model.AttentionConfig.QueryKeyValueTransformConfig class method)
parse_torch_device() (in module modalities.config.utils)
patch_size (modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
patch_stride (modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
payload (modalities.logging_broker.messages.Message attribute)
pipeline_parallel_degree (modalities.running_env.fsdp.device_mesh.DeviceMeshConfig attribute)
PLAIN (modalities.nn.model_initialization.parameter_name_filters.WeightInitTypes attribute)
PlainInitializationConfig (class in modalities.nn.model_initialization.initialization_routines)
poe_type (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
PositionTypes (class in modalities.models.gpt2.gpt2_model)
PP (modalities.running_env.fsdp.device_mesh.ParallelismDegrees attribute)
prediction_key (modalities.models.coca.coca_model.CoCaConfig attribute)
(modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
(modalities.models.huggingface.huggingface_model.HuggingFacePretrainedModelConfig attribute)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
predictions (modalities.batch.InferenceResultBatch attribute)
PreTrainedHFTokenizer (class in modalities.tokenization.tokenizer_wrapper)
PreTrainedSPTokenizer (class in modalities.tokenization.tokenizer_wrapper)
print_forward_hook() (in module modalities.utils.debug)
print_rank_0() (in module modalities.util)
ProgressUpdate (class in modalities.logging_broker.messages)
projection() (modalities.models.gpt2.gpt2_model.CausalSelfAttention method)
publish_message() (modalities.logging_broker.publisher.MessagePublisher method)
(modalities.logging_broker.publisher.MessagePublisherIF method)
PYTORCH_FLASH (modalities.models.gpt2.gpt2_model.AttentionImplementation attribute)
PYTORCH_FLASH_ATTENTION (modalities.nn.attention.AttentionEngineType attribute)
PyTorchDtypes (class in modalities.running_env.env_utils)
PytorchRMSLayerNormConfig (class in modalities.models.components.layer_norms)
Q
qk_norm_config (modalities.models.gpt2.gpt2_model.AttentionConfig attribute)
qkv_transforms (modalities.models.gpt2.gpt2_model.AttentionConfig attribute)
QueryKeyValueTransform (class in modalities.models.gpt2.gpt2_model)
QueryKeyValueTransformType (class in modalities.models.gpt2.gpt2_model)
R
RandomDatasetBatchGenerator (class in modalities.utils.profilers.batch_generator)
RandomDatasetBatchGeneratorConfig (class in modalities.utils.profilers.batch_generator)
raw_index_path (modalities.utils.number_conversion.NumStepsFromRawDatasetIndexConfig attribute)
reduce() (modalities.running_env.fsdp.reducer.Reducer static method)
reduce_dtype (modalities.running_env.env_utils.FSDP2MixedPrecisionSettings attribute)
Reducer (class in modalities.running_env.fsdp.reducer)
RegexFilter (class in modalities.nn.model_initialization.parameter_name_filters)
register_forward_hooks() (modalities.utils.debug_components.HookRegistration static method)
register_live_display() (modalities.logging_broker.subscriber_impl.progress_subscriber.RichProgressSubscriber class method)
register_nan_hooks() (modalities.utils.debug_components.HookRegistration static method)
register_print_forward_hooks() (modalities.utils.debug_components.HookRegistration static method)
REMAINING_CONFIGS (modalities.utils.benchmarking.benchmarking_utils.SweepSets attribute)
repeat_kv_heads() (modalities.models.gpt2.gpt2_model.CausalSelfAttention class method)
reset() (modalities.util.TimeRecorder method)
reset_parameters() (modalities.models.components.layer_norms.RMSLayerNorm method)
(modalities.models.gpt2.gpt2_model.RotaryTransform method)
ResultItem (class in modalities.batch)
RESULTS_FILE (modalities.utils.benchmarking.benchmarking_utils.FileNames attribute)
ResumableDistributedSampler (class in modalities.dataloader.samplers)
reuse_last_target (modalities.utils.number_conversion.NumTokensFromPackedMemMapDatasetContinuousConfig attribute)
RichProgressSubscriber (class in modalities.logging_broker.subscriber_impl.progress_subscriber)
RMSLayerNorm (class in modalities.models.components.layer_norms)
RMSLayerNormConfig (class in modalities.models.components.layer_norms)
RotaryTransform (class in modalities.models.gpt2.gpt2_model)
rotate_half() (modalities.models.gpt2.gpt2_model.RotaryTransform method)
run() (modalities.dataloader.create_packed_data.PackedDataGenerator method)
(modalities.inference.text.inference_component.TextInferenceComponent method)
run_checkpoint_instruction() (modalities.checkpointing.checkpoint_saving_execution.CheckpointSavingExecutionABC method)
run_communication_test() (in module modalities.utils.communication_test)
RUNNING (modalities.util.TimeRecorderStates attribute)
RunningEnvError
S
sample_definition (modalities.dataloader.dataset.DummyDatasetConfig attribute)
sample_key (modalities.dataloader.dataset.DummySampleConfig attribute)
(modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
(modalities.models.huggingface.huggingface_model.HuggingFacePretrainedModelConfig attribute)
(modalities.models.vision_transformer.vision_transformer_model.VisionTransformerConfig attribute)
(modalities.utils.number_conversion.NumTokensFromPackedMemMapDatasetContinuousConfig attribute)
sample_keys (modalities.models.coca.collator.CoCaCollateFnConfig attribute)
sample_shape (modalities.dataloader.dataset.DummySampleConfig attribute)
sample_type (modalities.dataloader.dataset.DummySampleConfig attribute)
samples (modalities.batch.DatasetBatch attribute)
save_checkpoint() (modalities.checkpointing.checkpoint_saving.CheckpointSaving method)
save_current (modalities.checkpointing.checkpoint_saving_instruction.CheckpointingInstruction attribute)
SaveEveryKStepsCheckpointingStrategy (class in modalities.checkpointing.checkpoint_saving_strategies)
SaveKMostRecentCheckpointsStrategy (class in modalities.checkpointing.checkpoint_saving_strategies)
SCALED (modalities.nn.model_initialization.parameter_name_filters.WeightInitTypes attribute)
SCALED_EMBED (modalities.nn.model_initialization.parameter_name_filters.WeightInitTypes attribute)
ScaledEmbedInitializationConfig (class in modalities.nn.model_initialization.initialization_routines)
ScaledInitializationConfig (class in modalities.nn.model_initialization.initialization_routines)
SchedulerList (class in modalities.optimizers.scheduler_list)
seed (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
SELECTIVE_LAYER_ACTIVATION_CHECKPOINTING (modalities.training.activation_checkpointing.activation_checkpointing_variants.ActivationCheckpointingVariants attribute)
SELECTIVE_OP_ACTIVATION_CHECKPOINTING (modalities.training.activation_checkpointing.activation_checkpointing_variants.ActivationCheckpointingVariants attribute)
seq_length_dim (modalities.models.gpt2.gpt2_model.AttentionConfig.QueryKeyValueTransformConfig.RotaryTransformConfig attribute)
sequence_length (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
(modalities.utils.number_conversion.LocalNumBatchesFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumSamplesFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumStepsFromNumTokensConfig attribute)
(modalities.utils.number_conversion.NumTokensFromNumStepsConfig attribute)
(modalities.utils.number_conversion.NumTokensFromPackedMemMapDatasetContinuousConfig attribute)
set_decoder() (modalities.conversion.gpt2.modeling_gpt2.GPT2ForCausalLM method)
shuffle_file_chunks_in_place() (modalities.preprocessing.create_chunks.Chunking static method)
shuffle_jsonl_data() (modalities.preprocessing.shuffle_data.DataShuffler static method)
shuffle_tokenized_data() (modalities.preprocessing.shuffle_data.DataShuffler static method)
special_tokens (modalities.tokenization.tokenizer_wrapper.PreTrainedHFTokenizer property)
StagesGenerator (class in modalities.models.parallelism.stages_generator)
start() (modalities.util.TimeRecorder method)
state_dict() (modalities.checkpointing.stateful.app_state.AppState method)
(modalities.optimizers.optimizer_list.OptimizersList method)
(modalities.optimizers.scheduler_list.SchedulerList method)
StatefulComponents (class in modalities.checkpointing.stateful.app_state)
StateRetrieverIF (class in modalities.checkpointing.stateful.app_state)
std (modalities.nn.model_initialization.initialization_routines.PlainInitializationConfig attribute)
(modalities.nn.model_initialization.initialization_routines.ScaledInitializationConfig attribute)
step() (modalities.optimizers.optimizer_list.OptimizersList method)
(modalities.optimizers.scheduler_list.SchedulerList method)
(modalities.utils.profilers.profilers.SteppableCombinedProfiler method)
(modalities.utils.profilers.profilers.SteppableKernelProfiler method)
(modalities.utils.profilers.profilers.SteppableMemoryProfiler method)
(modalities.utils.profilers.profilers.SteppableNoProfiler method)
(modalities.utils.profilers.profilers.SteppableProfilerIF method)
(modalities.utils.profilers.steppable_components.SteppableForwardPass method)
(modalities.utils.profilers.steppable_components_if.SteppableComponentIF method)
SteppableCombinedProfiler (class in modalities.utils.profilers.profilers)
SteppableComponentIF (class in modalities.utils.profilers.steppable_components_if)
SteppableForwardPass (class in modalities.utils.profilers.steppable_components)
SteppableKernelProfiler (class in modalities.utils.profilers.profilers)
SteppableMemoryProfiler (class in modalities.utils.profilers.profilers)
SteppableNoProfiler (class in modalities.utils.profilers.profilers)
SteppableProfilerIF (class in modalities.utils.profilers.profilers)
stop() (modalities.util.TimeRecorder method)
STOPPED (modalities.util.TimeRecorderStates attribute)
supports_gradient_checkpointing (modalities.conversion.gpt2.modeling_gpt2.GPT2PreTrainedModel attribute)
SupportWeightInitModels (class in modalities.nn.model_initialization.parameter_name_filters)
sweep (modalities.utils.benchmarking.sweep_utils.SweepConfig attribute)
sweep_config (modalities.utils.benchmarking.sweep_utils.SweepGenerator attribute)
SweepConfig (class in modalities.utils.benchmarking.sweep_utils)
SweepGenerator (class in modalities.utils.benchmarking.sweep_utils)
SweepSets (class in modalities.utils.benchmarking.benchmarking_utils)
SwiGLU (class in modalities.models.model)
SWIGLU (modalities.models.model.ActivationType attribute)
T
tag (modalities.loss_functions.Loss property)
target_keys (modalities.models.coca.collator.CoCaCollateFnConfig attribute)
targets (modalities.batch.DatasetBatch attribute)
(modalities.batch.InferenceResultBatch attribute)
tensor_parallel_degree (modalities.running_env.fsdp.device_mesh.DeviceMeshConfig attribute)
text_cls_prediction_key (modalities.models.coca.coca_model.CoCaConfig attribute)
text_decoder_config (modalities.models.coca.coca_model.CoCaConfig attribute)
text_embd_prediction_key (modalities.models.coca.coca_model.CoCaConfig attribute)
text_sample_key (modalities.models.coca.collator.CoCaCollateFnConfig attribute)
text_target_key (modalities.models.coca.collator.CoCaCollateFnConfig attribute)
TextDecoder (class in modalities.models.coca.text_decoder)
TextDecoderConfig (class in modalities.models.coca.coca_model)
TextInferenceComponent (class in modalities.inference.text.inference_component)
throughput_metrics (modalities.batch.EvaluationResultBatch attribute)
TimeRecorder (class in modalities.util)
TimeRecorderStateError
TimeRecorderStates (class in modalities.util)
to() (modalities.batch.DatasetBatch method)
(modalities.batch.InferenceResultBatch method)
(modalities.batch.TorchDeviceMixin method)
to_cpu() (modalities.batch.InferenceResultBatch method)
TOKEN_SIZE_DESCRIPTOR_LENGTH_IN_BYTES (modalities.dataloader.create_packed_data.EmbeddedStreamData attribute)
(modalities.dataloader.dataset.PackedMemMapDatasetBase attribute)
token_size_in_bytes (modalities.dataloader.dataset.PackedMemMapDatasetBase property)
tokenize() (modalities.tokenization.tokenizer_wrapper.PreTrainedHFTokenizer method)
(modalities.tokenization.tokenizer_wrapper.PreTrainedSPTokenizer method)
(modalities.tokenization.tokenizer_wrapper.TokenizerWrapper method)
TokenizerWrapper (class in modalities.tokenization.tokenizer_wrapper)
TorchDeviceMixin (class in modalities.batch)
TP (modalities.running_env.fsdp.device_mesh.ParallelismDegrees attribute)
TRAIN (modalities.logging_broker.messages.ExperimentStatus attribute)
TrainingProgress (class in modalities.training.training_progress)
transfer_model_code() (in module modalities.conversion.gpt2.conversion_code)
TransformerBlock (class in modalities.models.coca.multi_modal_decoder)
TransformerMLP (class in modalities.models.gpt2.gpt2_model)
trunc_normal_() (in module modalities.models.gpt2.llama3_like_initialization)
type_converter_for_torch (modalities.dataloader.dataset.PackedMemMapDatasetBase attribute)
type_hint (modalities.models.gpt2.gpt2_model.AttentionConfig.QueryKeyValueTransformConfig attribute)
U
update_data_length_in_pre_allocated_header() (in module modalities.dataloader.create_packed_data)
UPDATED_CONFIGS (modalities.utils.benchmarking.benchmarking_utils.SweepSets attribute)
use_meta_device (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
use_weight_tying (modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
V
validate_sizes() (modalities.models.gpt2.gpt2_model.GPT2LLMConfig method)
value (modalities.batch.ResultItem attribute)
vision_cls_prediction_key (modalities.models.coca.coca_model.CoCaConfig attribute)
vision_embd_prediction_key (modalities.models.coca.coca_model.CoCaConfig attribute)
vision_encoder_config (modalities.models.coca.coca_model.CoCaConfig attribute)
VisionTransformer (class in modalities.models.vision_transformer.vision_transformer_model)
VisionTransformerBlock (class in modalities.models.vision_transformer.vision_transformer_model)
VisionTransformerConfig (class in modalities.models.vision_transformer.vision_transformer_model)
vocab_size (modalities.models.coca.coca_model.TextDecoderConfig attribute)
(modalities.models.gpt2.gpt2_model.GPT2LLMConfig attribute)
(modalities.tokenization.tokenizer_wrapper.PreTrainedHFTokenizer property)
(modalities.tokenization.tokenizer_wrapper.PreTrainedSPTokenizer property)
(modalities.tokenization.tokenizer_wrapper.TokenizerWrapper property)
W
warn_rank_0() (in module modalities.util)
weight_decay_groups (modalities.models.model.NNModel property)
WeightInitTypes (class in modalities.nn.model_initialization.parameter_name_filters)
weights (modalities.nn.model_initialization.parameter_name_filters.RegexFilter attribute)
world_size (modalities.running_env.fsdp.device_mesh.DeviceMeshConfig attribute)
Z
zero_grad() (modalities.optimizers.optimizer_list.OptimizersList method)