diff --git a/PyTorch/CustomStuff/MyAwesomeModel/README.md b/PyTorch/CustomStuff/MyAwesomeModel/README.md new file mode 100644 index 000000000..60737e766 --- /dev/null +++ b/PyTorch/CustomStuff/MyAwesomeModel/README.md @@ -0,0 +1,8 @@ +# My Model... ya... + +This repository contains a minimal implementation of a GPT-style chatbot from scratch using PyTorch. It supports: + +* **Data loading**: extraction from PDF files, tokenization, vocabulary building. +* **Model**: a lightweight Transformer-based GPT implemented with `nn.TransformerEncoder`. +* **Training**: training loop with checkpoint saving. +* **Generation**: simple text generation utility. \ No newline at end of file diff --git a/PyTorch/CustomStuff/MyAwesomeModel/model.py b/PyTorch/CustomStuff/MyAwesomeModel/model.py new file mode 100644 index 000000000..a30c47e61 --- /dev/null +++ b/PyTorch/CustomStuff/MyAwesomeModel/model.py @@ -0,0 +1,36 @@ +import torch +import torch.nn as nn +from config import Config + +def generate_causal_mask(size): + mask = torch.triu(torch.ones(size, size) * float('-inf'), diagonal=1) + return mask + +class GPT(nn.Module): + def __init__(self): + super().__init__() + self.token_emb = nn.Embedding(Config.vocab_size, Config.d_model) + self.pos_emb = nn.Parameter(torch.zeros(1, Config.seq_len, Config.d_model)) + encoder_layer = nn.TransformerEncoderLayer( + d_model=Config.d_model, + nhead=Config.n_heads, + dim_feedforward=4 * Config.d_model, + dropout=0.1, + activation='gelu' + ) + self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=Config.n_layers) + self.ln_f = nn.LayerNorm(Config.d_model) + self.head = nn.Linear(Config.d_model, Config.vocab_size) + + def forward(self, idx): + B, T = idx.size() + tok = self.token_emb(idx) # (B, T, d_model) + pos = self.pos_emb[:, :T, :] # (1, T, d_model) + x = tok + pos + x = x.transpose(0, 1) # (T, B, d_model) + mask = generate_causal_mask(T).to(x.device) + x = self.transformer(x, mask=mask) + x = x.transpose(0, 1) # (B, T, d_model) + x = self.ln_f(x) + logits = self.head(x) # (B, T, vocab_size) + return logits diff --git a/PyTorch/CustomStuff/MyAwesomeModel/requirements.txt b/PyTorch/CustomStuff/MyAwesomeModel/requirements.txt new file mode 100644 index 000000000..92abc6eed --- /dev/null +++ b/PyTorch/CustomStuff/MyAwesomeModel/requirements.txt @@ -0,0 +1,2 @@ +torch>=1.13.0 +numpy diff --git a/PyTorch/CustomStuff/MyAwesomeModel/train.py b/PyTorch/CustomStuff/MyAwesomeModel/train.py new file mode 100644 index 000000000..8d0fff801 --- /dev/null +++ b/PyTorch/CustomStuff/MyAwesomeModel/train.py @@ -0,0 +1,36 @@ +import os +import torch +import torch.nn.functional as F +from torch.optim import Adam +from data_loader import get_loader +from model import GPT +from config import Config + +# Prepare data +loader, vocab = get_loader(vocab_path=Config.VOCAB_PATH) + +# Initialize model +model = GPT().to(Config.device) +optimizer = Adam(model.parameters(), lr=Config.lr) + +# Training loop +for epoch in range(1, Config.epochs + 1): + model.train() + total_loss = 0.0 + for x, y in loader: + x = x.to(Config.device) + y = y.to(Config.device) + logits = model(x) + loss = F.cross_entropy(logits.view(-1, Config.vocab_size), y.view(-1)) + optimizer.zero_grad() + loss.backward() + optimizer.step() + total_loss += loss.item() + avg_loss = total_loss / len(loader) + print(f"Epoch {epoch}/{Config.epochs}, Loss: {avg_loss:.4f}") + + # Save checkpoint + os.makedirs(Config.OUTPUT_DIR, exist_ok=True) + ckpt_path = os.path.join(Config.OUTPUT_DIR, f"model_epoch{epoch}.pt") + torch.save(model.state_dict(), ckpt_path) + print(f"Saved checkpoint: {ckpt_path}") \ No newline at end of file