Modalities
diff --git a/‎.github/workflows/tests_full.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/tests_full.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore
Lines changed: 6 additions & 2 deletions b/‎.gitignore
Lines changed: 6 additions & 2 deletions
diff --git a/‎CHANGELOG_DEV.md
Lines changed: 23 additions & 1 deletion b/‎CHANGELOG_DEV.md
Lines changed: 23 additions & 1 deletion
diff --git a/‎README.md
Lines changed: 15 additions & 1 deletion b/‎README.md
Lines changed: 15 additions & 1 deletion
diff --git a/‎config_files/text_generation/text_generation_config_torch.yaml
Lines changed: 0 additions & 87 deletions b/‎config_files/text_generation/text_generation_config_torch.yaml
Lines changed: 0 additions & 87 deletions
diff --git a/‎config_files/training/config_lorem_ipsum_long_fsdp2.yaml
Lines changed: 1 addition & 1 deletion b/‎config_files/training/config_lorem_ipsum_long_fsdp2.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎config_files/training/config_lorem_ipsum_long_fsdp2_warmstart.yaml
Lines changed: 1 addition & 1 deletion b/‎config_files/training/config_lorem_ipsum_long_fsdp2_warmstart.yaml
Lines changed: 1 addition & 1 deletion
@@ -27,7 +27,7 @@ jobs:
         python -m pip install --upgrade pip setuptools wheel
         export FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE
         python -m pip install ninja                   # Lowers compilation time of flash attention significantly 
-        python -m pip install flash-attn --no-build-isolation
+        python -m pip install flash-attn==2.7.4.post1 --no-build-isolation
         python -m pip install -e .[tests]
     - name: Run tests
       run: |
 
@@ -9,6 +9,7 @@ logs/
 core.*
 checkpoint
 wandb
+artifacts
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -163,5 +164,8 @@ tests/tmp/*
 *wandb_storage*
 .coverage/*
 *.pbin
-
-tutorials/profiling/experiments
+tutorials/scaling_up/experiments
+tutorials/profiling/experiments
+tutorials/instruction_tuning/prepared_data
+config_files/instruction_tuning
+data/lorem_ipsum_instruct.jsonl
@@ -163,4 +163,26 @@ Some HF tokenisers such as `xlm-roberta-large` add special tokens (e.g., eod tok
 This side-effect in the transformers library has lead to the eod token being appended twice when tokenizing / packing our data. We added a check for this and only append the eod token once now:
 https://github.com/Modalities/modalities/blob/1c1ccdc973283c45bc8c9fadf4d20f03e435cd04/src/modalities/dataloader/create_packed_data.py#L327-L330
 
-Additionally, I added a script that verifies the consistency of the indexation and tokenization of a given JSONL file. We run the indexation and tokenization routines in modalities and compare it to tokenized JSONL file to which we applied the HF tokenizer directly. 
+Additionally, I added a script that verifies the consistency of the indexation and tokenization of a given JSONL file. We run the indexation and tokenization routines in modalities and compare it to tokenized JSONL file to which we applied the HF tokenizer directly. 
+
+## PR #379 Instruction Tuning Support
+
+  * New entry point `apply_chat_template` to form chats and create index and pbin files of it
+  * A wrapper for collate functions to include tokens in the loss which appear between indicator tokens
+  * A new parameter for the PackedMemMapDatasetContinuous to allow not to re-use the last target token
+  * A tutorial how to apply instruction-tuning on a Huggingface Model
+
+
+## PR #359 Activation Checkpoint with FSDP2 
+
+This PR adds activation checkpointing (AC) support for FSDP2. 
+There are now three AC variants: 
+* Full AC (same as before, where entire complete modules get ACed, leading to the largest memory footprint reduction)
+* Selective Layer AC (only very nth layer or module is ACed)
+* Selective OP Ac (only certain OPs, typically low memory but compute intense, are checkpointed)
+
+## PR #374 Tensor Parallelism Support
+
+* adds support for Tensor Parallelism (including Sequence Parallelism). 
+* adds a debugging toolkit to track the input and output tensors during a forward pass, gradients during the backward pass and weight tensors.
+Tensors can be either normal Tensors or DTensors.  
@@ -44,7 +44,7 @@ conda activate modalities
 # install PyTorch, Ninja and Flash Attention (mandatory)
 pip install torch==2.6.0
 pip install ninja     # Lowers compilation time of flash attention significantly 
-pip install flash-attn --no-build-isolation
+pip install flash-attn==2.7.4.post1 --no-build-isolation
 ```
 
 ### Option 1: Installation from source
@@ -74,6 +74,20 @@ To install Modalities via pip, run
 pip install modalities
 ```
 
+### Option 3: Feature Complete via UV
+
+```sh
+curl -LsSf https://astral.sh/uv/install.sh | sh
+uv venv --seed --python 3.11 --prompt modalities
+source .venv/bin/activate
+uv pip install torch
+uv pip install ninja
+uv pip install --no-build-isolation flash-attn==2.7.4.post1
+# for developer: use [tests,linting] and install pre-commit hooks
+uv pip install -e .[tests,linting]
+pre-commit install --install-hooks
+```
+
 ## Usage
 Modalities provides several entry points to interact with the framework. The following section lists the available entry points and their respective functionalities.
 
 
@@ -23,7 +23,7 @@ settings:
     enforce_last_step_evaluated: false
     enforce_last_step_checkpointed: false
   step_profile:
-    gradient_accumulation_steps: 2
+    gradient_accumulation_steps: 1
     local_train_micro_batch_size: 1
     sequence_length: 256
   training_target:
 
@@ -68,7 +68,7 @@ settings:
       config:
         checkpoint_path: ${settings.warmstart_checkpoint_paths.checkpoint_folder_path}
   warmstart_checkpoint_paths: # ${warmstart_env:checkpoint_paths}
-    checkpoint_folder_path: /raid/fromm/modalities/data/checkpoints/2025-04-16__12-40-51_6dcbb1a0/eid_2025-04-16__12-40-51_6dcbb1a0-seen_steps_32-seen_tokens_65536-target_steps_162-target_tokens_331776
+    checkpoint_folder_path: /raid/s3/opengptx/max_lue/repositories/modalities/data/checkpoints/2025-03-14__15-25-59_970fedec/eid_2025-03-14__15-25-59_970fedec-seen_steps_96-seen_tokens_196608-target_steps_162-target_tokens_331776
 
 collate_fn:
   component_key: collate_fn