Skip to content

Renothingg #1459

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions PyTorch/CustomStuff/MyAwesomeModel/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# My Model... ya...

This repository contains a minimal implementation of a GPT-style chatbot from scratch using PyTorch. It supports:

* **Data loading**: extraction from PDF files, tokenization, vocabulary building.
* **Model**: a lightweight Transformer-based GPT implemented with `nn.TransformerEncoder`.
* **Training**: training loop with checkpoint saving.
* **Generation**: simple text generation utility.
36 changes: 36 additions & 0 deletions PyTorch/CustomStuff/MyAwesomeModel/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import torch
import torch.nn as nn
from config import Config

def generate_causal_mask(size):
mask = torch.triu(torch.ones(size, size) * float('-inf'), diagonal=1)
return mask

class GPT(nn.Module):
def __init__(self):
super().__init__()
self.token_emb = nn.Embedding(Config.vocab_size, Config.d_model)
self.pos_emb = nn.Parameter(torch.zeros(1, Config.seq_len, Config.d_model))
encoder_layer = nn.TransformerEncoderLayer(
d_model=Config.d_model,
nhead=Config.n_heads,
dim_feedforward=4 * Config.d_model,
dropout=0.1,
activation='gelu'
)
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=Config.n_layers)
self.ln_f = nn.LayerNorm(Config.d_model)
self.head = nn.Linear(Config.d_model, Config.vocab_size)

def forward(self, idx):
B, T = idx.size()
tok = self.token_emb(idx) # (B, T, d_model)
pos = self.pos_emb[:, :T, :] # (1, T, d_model)
x = tok + pos
x = x.transpose(0, 1) # (T, B, d_model)
mask = generate_causal_mask(T).to(x.device)
x = self.transformer(x, mask=mask)
x = x.transpose(0, 1) # (B, T, d_model)
x = self.ln_f(x)
logits = self.head(x) # (B, T, vocab_size)
return logits
2 changes: 2 additions & 0 deletions PyTorch/CustomStuff/MyAwesomeModel/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
torch>=1.13.0
numpy
36 changes: 36 additions & 0 deletions PyTorch/CustomStuff/MyAwesomeModel/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import torch
import torch.nn.functional as F
from torch.optim import Adam
from data_loader import get_loader
from model import GPT
from config import Config

# Prepare data
loader, vocab = get_loader(vocab_path=Config.VOCAB_PATH)

# Initialize model
model = GPT().to(Config.device)
optimizer = Adam(model.parameters(), lr=Config.lr)

# Training loop
for epoch in range(1, Config.epochs + 1):
model.train()
total_loss = 0.0
for x, y in loader:
x = x.to(Config.device)
y = y.to(Config.device)
logits = model(x)
loss = F.cross_entropy(logits.view(-1, Config.vocab_size), y.view(-1))
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(loader)
print(f"Epoch {epoch}/{Config.epochs}, Loss: {avg_loss:.4f}")

# Save checkpoint
os.makedirs(Config.OUTPUT_DIR, exist_ok=True)
ckpt_path = os.path.join(Config.OUTPUT_DIR, f"model_epoch{epoch}.pt")
torch.save(model.state_dict(), ckpt_path)
print(f"Saved checkpoint: {ckpt_path}")