Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 33 additions & 5 deletions pulp_python/app/pypi/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from gettext import gettext as _

from rest_framework import serializers
from pulp_python.app.utils import DIST_EXTENSIONS
from pulp_python.app.utils import DIST_EXTENSIONS, SUPPORTED_METADATA_VERSIONS
from pulpcore.plugin.models import Artifact
from pulpcore.plugin.util import get_domain
from django.db.utils import IntegrityError
Expand Down Expand Up @@ -54,6 +54,22 @@ class PackageUploadSerializer(serializers.Serializer):
min_length=64,
max_length=64,
)
protocol_version = serializers.ChoiceField(
help_text=_("Protocol version to use for the upload. Only version 1 is supported."),
required=False,
choices=(1,),
default=1,
)
filetype = serializers.ChoiceField(
help_text=_("Type of artifact to upload."),
required=False,
choices=("bdist_wheel", "sdist"),
)
metadata_version = serializers.ChoiceField(
help_text=_("Metadata version of the uploaded package."),
required=False,
choices=SUPPORTED_METADATA_VERSIONS,
)
Copy link
Contributor

@dralley dralley Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

edit: maybe I'm misunderstanding, I suppose you mention that the values do come from the uploaded artifact, but the RPM plugin also does so and doesn't add any of the metadata to the upload serializer, so this is different than what I'm familiar with.

I'm confused about the fact that all of it seems to be optional. If it's all optional than why allow the user to specify in the first place (maybe these are all just gripes with the API they've chosen and not with the fact that Pulp needs to implement it)

Is it just the case that twine provides all this information on uploads and the API (including e.g. warehouse) is meant to just trust it unconditionally, if provided? Or does it get verified? And if it gets verified then against what, and why bother with accepting it if there's a canonical source?

Seems like it would be ideal for most of these details to be parsed from package metadata instead of provided alongside the upload? (though IIRC that was briefly discussed earlier, I guess my point is that if we add it now would it not become useless should metadata later be parsed directly from the package). But again maybe this is just complaining about the API

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, maybe I should remove them. Reading the PyPI code (https://github.com/pypi/warehouse/blob/117eafc3739f738a2c29fbadbb858bf502ba1ff0/warehouse/forklift/legacy.py#L618-L623) it seems they trust all the metadata that is posted in the request, but we have always been extracting the metadata from the file so these fields are kind of useless.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, what is the resolution, stay or remove?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're gone, look at the latest push.


def validate(self, data):
"""Validates the request."""
Expand All @@ -63,14 +79,26 @@ def validate(self, data):
file = data.get("content")
for ext, packagetype in DIST_EXTENSIONS.items():
if file.name.endswith(ext):
if filetype := data.get("filetype"):
if filetype != packagetype:
Comment on lines +82 to +83
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe if (filetype := data.get("filetype")) and filetype != packagetype: to save one nesting?

raise serializers.ValidationError(
{
"filetype": _(
"filetype {} does not match found filetype {} for file {}"
).format(filetype, packagetype, file.name)
}
)
break
else:
raise serializers.ValidationError(
_(
"Extension on {} is not a valid python extension "
"(.whl, .exe, .egg, .tar.gz, .tar.bz2, .zip)"
).format(file.name)
{
"content": _(
"Extension on {} is not a valid python extension "
"(.whl, .exe, .egg, .tar.gz, .tar.bz2, .zip)"
).format(file.name)
}
)

sha256 = data.get("sha256_digest")
digests = {"sha256": sha256} if sha256 else None
artifact = Artifact.init_and_validate(file, expected_digests=digests)
Expand Down
1 change: 1 addition & 0 deletions pulp_python/app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
"""TODO This serial constant is temporary until Python repositories implements serials"""
PYPI_SERIAL_CONSTANT = 1000000000
SUPPORTED_METADATA_VERSIONS = ("1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3", "2.4")

SIMPLE_API_VERSION = "1.0"

Expand Down
30 changes: 30 additions & 0 deletions pulp_python/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
PYTHON_XS_PROJECT_SPECIFIER,
PYTHON_EGG_FILENAME,
PYTHON_URL,
PYTHON_EGG_URL,
PYTHON_WHEEL_URL,
PYTHON_WHEEL_FILENAME,
)


Expand Down Expand Up @@ -183,6 +186,20 @@ def _gen_python_content(relative_path=PYTHON_EGG_FILENAME, url=None, **body):
yield _gen_python_content


@pytest.fixture
def python_empty_repo_distro(python_repo_factory, python_distribution_factory):
"""Returns an empty repo with and distribution serving it."""

def _generate_empty_repo_distro(repo_body=None, distro_body=None):
repo_body = repo_body or {}
distro_body = distro_body or {}
repo = python_repo_factory(**repo_body)
distro = python_distribution_factory(repository=repo, **distro_body)
return repo, distro

yield _generate_empty_repo_distro


# Utility fixtures


Expand Down Expand Up @@ -217,3 +234,16 @@ def _gen_summary(repository_version=None, repository=None, version=None):
def get_href(item):
"""Tries to get the href from the given item, whether it is a string or object."""
return item if isinstance(item, str) else item.pulp_href


@pytest.fixture(scope="session")
def python_package_dist_directory(tmp_path_factory, http_get):
"""Creates a temp dir to hold package distros for uploading."""
dist_dir = tmp_path_factory.mktemp("dist")
egg_file = dist_dir / PYTHON_EGG_FILENAME
wheel_file = dist_dir / PYTHON_WHEEL_FILENAME
with open(egg_file, "wb") as f:
f.write(http_get(PYTHON_EGG_URL))
with open(wheel_file, "wb") as f:
f.write(http_get(PYTHON_WHEEL_URL))
yield dist_dir, egg_file, wheel_file
30 changes: 0 additions & 30 deletions pulp_python/tests/functional/api/test_pypi_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@
PYTHON_MD_PROJECT_SPECIFIER,
PYTHON_MD_PYPI_SUMMARY,
PYTHON_EGG_FILENAME,
PYTHON_EGG_URL,
PYTHON_EGG_SHA256,
PYTHON_WHEEL_FILENAME,
PYTHON_WHEEL_URL,
PYTHON_WHEEL_SHA256,
SHELF_PYTHON_JSON,
)
Expand All @@ -26,20 +23,6 @@
PYPI_SERIAL_CONSTANT = 1000000000


@pytest.fixture
def python_empty_repo_distro(python_repo_factory, python_distribution_factory):
"""Returns an empty repo with and distribution serving it."""

def _generate_empty_repo_distro(repo_body=None, distro_body=None):
repo_body = repo_body or {}
distro_body = distro_body or {}
repo = python_repo_factory(**repo_body)
distro = python_distribution_factory(repository=repo, **distro_body)
return repo, distro

yield _generate_empty_repo_distro


@pytest.mark.parallel
def test_empty_index(python_bindings, python_empty_repo_distro):
"""Checks that summary stats are 0 when index is empty."""
Expand Down Expand Up @@ -80,19 +63,6 @@ def test_published_index(
assert summary.to_dict() == PYTHON_MD_PYPI_SUMMARY


@pytest.fixture(scope="module")
def python_package_dist_directory(tmp_path_factory, http_get):
"""Creates a temp dir to hold package distros for uploading."""
dist_dir = tmp_path_factory.mktemp("dist")
egg_file = dist_dir / PYTHON_EGG_FILENAME
wheel_file = dist_dir / PYTHON_WHEEL_FILENAME
with open(egg_file, "wb") as f:
f.write(http_get(PYTHON_EGG_URL))
with open(wheel_file, "wb") as f:
f.write(http_get(PYTHON_WHEEL_URL))
yield dist_dir, egg_file, wheel_file


@pytest.mark.parallel
def test_package_upload(
python_content_summary, python_empty_repo_distro, python_package_dist_directory, monitor_task
Expand Down
80 changes: 80 additions & 0 deletions pulp_python/tests/functional/api/test_upload.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import pytest
import requests
from pulp_python.tests.functional.constants import (
PYTHON_EGG_FILENAME,
PYTHON_EGG_URL,
PYTHON_WHEEL_FILENAME,
PYTHON_WHEEL_URL,
PYTHON_EGG_SHA256,
PYTHON_WHEEL_SHA256,
)
from urllib.parse import urljoin


@pytest.mark.parametrize(
Expand Down Expand Up @@ -42,3 +46,79 @@ def test_synchronous_package_upload(
with pytest.raises(python_bindings.ApiException) as ctx:
python_bindings.ContentPackagesApi.upload(**content_body)
assert ctx.value.status == 403


@pytest.mark.parallel
def test_legacy_upload_invalid_protocol_version(
python_empty_repo_distro, python_package_dist_directory
):
_, egg_file, _ = python_package_dist_directory
_, distro = python_empty_repo_distro()
url = urljoin(distro.base_url, "legacy/")
with open(egg_file, "rb") as f:
response = requests.post(
url,
data={"sha256_digest": PYTHON_EGG_SHA256, "protocol_version": 2},
files={"content": f},
auth=("admin", "password"),
)
assert response.status_code == 400
assert response.json()["protocol_version"] == ['"2" is not a valid choice.']

with open(egg_file, "rb") as f:
response = requests.post(
url,
data={"sha256_digest": PYTHON_EGG_SHA256, "protocol_version": 0},
files={"content": f},
auth=("admin", "password"),
)
assert response.status_code == 400
assert response.json()["protocol_version"] == ['"0" is not a valid choice.']


@pytest.mark.parallel
def test_legacy_upload_invalid_filetype(python_empty_repo_distro, python_package_dist_directory):
_, egg_file, wheel_file = python_package_dist_directory
_, distro = python_empty_repo_distro()
url = urljoin(distro.base_url, "legacy/")
with open(egg_file, "rb") as f:
response = requests.post(
url,
data={"sha256_digest": PYTHON_EGG_SHA256, "filetype": "bdist_wheel"},
files={"content": f},
auth=("admin", "password"),
)
assert response.status_code == 400
assert response.json()["filetype"] == [
"filetype bdist_wheel does not match found filetype sdist for file shelf-reader-0.1.tar.gz"
]

with open(wheel_file, "rb") as f:
response = requests.post(
url,
data={"sha256_digest": PYTHON_WHEEL_SHA256, "filetype": "sdist"},
files={"content": f},
auth=("admin", "password"),
)
assert response.status_code == 400
assert response.json()["filetype"] == [
"filetype sdist does not match found filetype bdist_wheel for file shelf_reader-0.1-py2-none-any.whl" # noqa: E501
]


@pytest.mark.parallel
def test_legacy_upload_invalid_metadata_version(
python_empty_repo_distro, python_package_dist_directory
):
_, egg_file, _ = python_package_dist_directory
_, distro = python_empty_repo_distro()
url = urljoin(distro.base_url, "legacy/")
with open(egg_file, "rb") as f:
response = requests.post(
url,
data={"sha256_digest": PYTHON_EGG_SHA256, "metadata_version": "3.0"},
files={"content": f},
auth=("admin", "password"),
)
assert response.status_code == 400
assert response.json()["metadata_version"] == ['"3.0" is not a valid choice.']