Skip to content

feat: Set up Python testing infrastructure with Poetry and pytest #65

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,55 @@
*.pyc
*.bin

# Testing related
.pytest_cache/
.coverage
htmlcov/
coverage.xml
*.cover
*.py,cover
.hypothesis/
pytest_cache/

# Claude settings
.claude/*

# Virtual environments
venv/
.venv/
env/
.env/
ENV/
env.bak/
venv.bak/

# Poetry
# Note: poetry.lock should be committed to ensure reproducible builds

# Build artifacts
build/
dist/
*.egg-info/
.eggs/
__pycache__/
*.pyo
*.pyd

# IDE files
.idea/
.vscode/
*.swp
*.swo
*~
.DS_Store

# Jupyter Notebook
.ipynb_checkpoints

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
1,453 changes: 1,453 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

84 changes: 84 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
[tool.poetry]
name = "pointer-summarizer"
version = "0.1.0"
description = "PyTorch implementation of Get To The Point: Summarization with Pointer-Generator Networks"
authors = ["Your Name <[email protected]>"]
readme = "README.md"
license = "MIT"
packages = [
{ include = "data_util" },
{ include = "training_ptr_gen" }
]

[tool.poetry.dependencies]
python = "^3.8"
torch = ">=1.0.0"
numpy = "*"
# pyrouge = "*" # Note: pyrouge needs to be installed separately following its instructions

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"
pytest-cov = "^4.1.0"
pytest-mock = "^3.11.0"

[tool.poetry.scripts]
test = "pytest:main"
tests = "pytest:main"

[tool.pytest.ini_options]
minversion = "7.0"
testpaths = ["tests"]
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = [
"-ra",
"--strict-markers",
"--cov=data_util",
"--cov=training_ptr_gen",
"--cov-report=html",
"--cov-report=xml",
"--cov-report=term-missing",
"--cov-fail-under=0", # Set to 0 for initial setup, increase when adding real tests
"-v",
"--tb=short"
]
markers = [
"unit: marks tests as unit tests (fast)",
"integration: marks tests as integration tests (slower)",
"slow: marks tests as slow running"
]

[tool.coverage.run]
source = ["data_util", "training_ptr_gen"]
omit = [
"*/tests/*",
"*/__init__.py",
"*/setup.py",
"*/venv/*",
"*/.venv/*",
"data_util/data.py" # Python 2 syntax file, needs migration
]

[tool.coverage.report]
precision = 2
show_missing = true
skip_covered = false
exclude_lines = [
"pragma: no cover",
"def __repr__",
"raise AssertionError",
"raise NotImplementedError",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:"
]

[tool.coverage.html]
directory = "htmlcov"

[tool.coverage.xml]
output = "coverage.xml"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
Empty file added tests/__init__.py
Empty file.
156 changes: 156 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""Shared pytest fixtures and configuration for all tests."""

import os
import shutil
import tempfile
from pathlib import Path
from typing import Generator, Dict, Any

import pytest
import torch


@pytest.fixture
def temp_dir() -> Generator[Path, None, None]:
"""Create a temporary directory for test files."""
temp_path = tempfile.mkdtemp()
yield Path(temp_path)
shutil.rmtree(temp_path, ignore_errors=True)


@pytest.fixture
def mock_config() -> Dict[str, Any]:
"""Provide a mock configuration dictionary for testing."""
return {
"hidden_dim": 256,
"emb_dim": 128,
"batch_size": 8,
"max_enc_steps": 400,
"max_dec_steps": 100,
"beam_size": 4,
"min_dec_steps": 35,
"vocab_size": 50000,
"lr": 0.15,
"adagrad_init_acc": 0.1,
"rand_unif_init_mag": 0.02,
"trunc_norm_init_std": 1e-4,
"max_grad_norm": 2.0,
"pointer_gen": True,
"is_coverage": True,
"cov_loss_wt": 1.0,
"eps": 1e-12,
}


@pytest.fixture
def sample_vocab() -> Dict[str, Any]:
"""Create a sample vocabulary for testing."""
vocab_dict = {
"[PAD]": 0,
"[UNK]": 1,
"[START]": 2,
"[STOP]": 3,
"the": 4,
"a": 5,
"is": 6,
"was": 7,
"are": 8,
"were": 9,
}

class MockVocab:
def __init__(self, vocab_dict):
self._word_to_id = vocab_dict
self._id_to_word = {v: k for k, v in vocab_dict.items()}

def word2id(self, word):
return self._word_to_id.get(word, 1) # Return UNK id for unknown words

def id2word(self, idx):
return self._id_to_word.get(idx, "[UNK]")

def size(self):
return len(self._word_to_id)

return MockVocab(vocab_dict)


@pytest.fixture
def sample_batch_data() -> Dict[str, Any]:
"""Create sample batch data for model testing."""
batch_size = 4
max_enc_steps = 50
max_dec_steps = 20

return {
"enc_batch": torch.randint(0, 100, (batch_size, max_enc_steps)),
"enc_padding_mask": torch.ones(batch_size, max_enc_steps),
"enc_lens": torch.tensor([45, 48, 50, 42]),
"enc_batch_extend_vocab": torch.randint(0, 150, (batch_size, max_enc_steps)),
"extra_zeros": torch.zeros((batch_size, 50)),
"c_t_1": torch.zeros((batch_size, 2 * 256)), # 2 * hidden_dim for bidirectional
"coverage": torch.zeros(batch_size, max_enc_steps),
"dec_batch": torch.randint(0, 100, (batch_size, max_dec_steps)),
"target_batch": torch.randint(0, 100, (batch_size, max_dec_steps)),
"dec_padding_mask": torch.ones(batch_size, max_dec_steps),
"max_art_oovs": 10,
}


@pytest.fixture
def device():
"""Return the appropriate device for testing."""
return torch.device("cuda" if torch.cuda.is_available() else "cpu")


@pytest.fixture(autouse=True)
def reset_random_seeds():
"""Reset random seeds before each test for reproducibility."""
import random
import numpy as np

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(42)


@pytest.fixture
def mock_data_path(temp_dir: Path) -> Path:
"""Create mock data directory structure."""
data_path = temp_dir / "data"
data_path.mkdir()

# Create subdirectories
(data_path / "finished_files").mkdir()
(data_path / "chunked").mkdir()

# Create a mock vocab file
vocab_path = data_path / "finished_files" / "vocab"
vocab_words = ["[PAD]", "[UNK]", "[START]", "[STOP]", "the", "a", "is", "was"]
vocab_path.write_text("\n".join(vocab_words))

return data_path


@pytest.fixture
def mock_model_path(temp_dir: Path) -> Path:
"""Create mock model directory."""
model_path = temp_dir / "models"
model_path.mkdir()
return model_path


@pytest.fixture
def mock_log_path(temp_dir: Path) -> Path:
"""Create mock log directory."""
log_path = temp_dir / "logs"
log_path.mkdir()
return log_path


@pytest.mark.timeout(60)
class TimeoutMarker:
"""Marker for tests with custom timeout."""
pass
Empty file added tests/integration/__init__.py
Empty file.
Loading