From 2ad0a9a0f01b07f481719eddcb03dde0d2375aaa Mon Sep 17 00:00:00 2001 From: Quentin Date: Fri, 4 Jul 2025 15:21:27 +0200 Subject: [PATCH 01/12] Modification of pyproject and workflow for building wheels --- .github/workflows/build_wheels.yml | 13 +++---------- pyproject.toml | 16 +++++++++++----- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index e942fc3..7976ecc 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -3,15 +3,8 @@ name: Build and upload to PyPI on: workflow_dispatch: push: - branches: - - main - paths: - - "src/radius_clustering/**" - - "tests/**" - - "pyproject.toml" - release: - types: - - published + tags: + - "v*" jobs: run_pytest: @@ -93,7 +86,7 @@ jobs: attestations: write #if: github.event_name == 'release' && github.event.action == 'published' # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) - #if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') steps: - name: Download all dists uses: actions/download-artifact@v4 diff --git a/pyproject.toml b/pyproject.toml index 2bc50a9..81ca56c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,10 +11,13 @@ authors = [ {name = "Quentin Haenn"}, {name = "Lias Laboratory"} ] +maintainers = [ + {name = "Quentin Haenn", email = "quentin.haenn.pro@gmail.com"} + ] dependencies = [ "matplotlib>=3.6.2", - "numpy>=1.23", + "numpy>=2.0", "scikit-learn>=1.2.2", "scipy>=1.12.0", ] @@ -22,6 +25,7 @@ dependencies = [ requires-python = ">=3.9" license = {file = "LICENSE"} classifiers=[ + "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", @@ -30,6 +34,8 @@ classifiers=[ "Programming Language :: Python", "Topic :: Software Development", "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Machine Learning", + "Topic :: Scientific/Engineering :: Mathematics", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX", "Operating System :: Unix", @@ -42,12 +48,12 @@ classifiers=[ "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", ] -keywords = ["Unsupervised learning","clustering", "minimum dominating sets","clustering under radius constraint"] +keywords = ["Unsupervised learning", "clustering", "minimum dominating sets","clustering under radius constraint"] [project.urls] -source = "https://github.com/lias-laboratory/radius_clustering" -tracker = "https://github.com/lias-laboratory/radius_clustering/issues" -documentation = "https://lias-laboratory.github.io/radius_clustering/" +source = "https://github.com/scikit-learn-contrib/radius_clustering" +tracker = "https://github.com/scikit-learn-contrib/radius_clustering/issues" +documentation = "https://contrib.scikit-learn.org/radius_clustering/" [project.optional-dependencies] dev = [ From da3e0092ffdff0f57236c1ed2bc145a35105dfba Mon Sep 17 00:00:00 2001 From: Quentin Date: Fri, 4 Jul 2025 15:49:57 +0200 Subject: [PATCH 02/12] Add linting workflows --- .github/workflows/lint.yml | 32 ++++++++++++++++++++++++++++++++ .github/workflows/sphinx.yml | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..b0fdc5b --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,32 @@ +name: Lint and Format + +on: + workflow_call: + workflow_dispatch: + +jobs: + lint-and-format: + name: Run Linters and Formatters + runs-on: ubuntu-latest + steps: + - name: checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install ".[dev]" + + - name: Run ruff linter + run: | + ruff check src/radius_clustering tests --fix + + - name: Run black formatter + run: | + black src/radius_clustering tests --check + diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml index e407f41..f159ff8 100644 --- a/.github/workflows/sphinx.yml +++ b/.github/workflows/sphinx.yml @@ -23,7 +23,7 @@ jobs: sudo apt-get update sudo apt-get install build-essential pip install --upgrade pip - pip install -e ".[doc]" + pip install ".[doc]" pushd docs make html popd From 1e7cc0dec54d538bfb585b1b065a71eb62322282 Mon Sep 17 00:00:00 2001 From: Quentin Date: Fri, 4 Jul 2025 16:00:18 +0200 Subject: [PATCH 03/12] install and configure pre-commit --- .pre-commit-config.yaml | 20 ++++++++++++++++++++ pyproject.toml | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..cb7d873 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.5 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format + +- repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.8.0 + hooks: + - id: black diff --git a/pyproject.toml b/pyproject.toml index 81ca56c..04fe3f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ requires-python = ">=3.9" license = {file = "LICENSE"} classifiers=[ - "Development Status :: 5 - Production/Stable", + "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", @@ -57,6 +57,7 @@ documentation = "https://contrib.scikit-learn.org/radius_clustering/" [project.optional-dependencies] dev = [ + "pre-commit>=3.8.0", "pytest>=8.3.3", "pytest-cov>=5.0.0", "pandas", From 6a167e00c6e539d48f088937fba06eba68cb5552 Mon Sep 17 00:00:00 2001 From: Quentin Date: Fri, 4 Jul 2025 16:03:07 +0200 Subject: [PATCH 04/12] modify pre-commit rules --- .pre-commit-config.yaml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cb7d873..1a01ffc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,14 +7,13 @@ repos: - id: check-yaml - id: check-added-large-files -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.5 - hooks: - - id: ruff - args: [--fix] - - id: ruff-format - - repo: https://github.com/psf/black-pre-commit-mirror rev: 24.8.0 hooks: - id: black + +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.5 + hooks: + - id: ruff + args: ["--fix", "--show-source"] From 5360da519cb581aad07a1d456595b67a0f696a27 Mon Sep 17 00:00:00 2001 From: Quentin Date: Mon, 7 Jul 2025 10:00:34 +0200 Subject: [PATCH 05/12] Adding Code of Conduct and contributing guidelines --- CODE_OF_CONDUCT.md | 128 +++++++++++++++++++++++++++++++++++++++++++++ CONTRIBUTING.md | 51 ++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..97a1673 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ + +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +- Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +- The use of sexualized language or imagery, and sexual attention or advances of + any kind +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, + without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement : +[Send Report](mailto:quentin.haenn.pro@gmail.com). +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the +[Contributor Covenant](https://www.contributor-covenant.org/), version 2.1, +available at +. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/inclusion). + +For answers to common questions about this code of conduct, see the FAQ at +. Translations are available at +. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..571b0bc --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,51 @@ +# Contributing to Radius Clustering + +First off, thank you for considering contributing to Radius Clustering! It's people like you that make open source such a great community. + +## Where do I go from here? + +If you've noticed a bug or have a feature request, [make one](https://github.com/scikit-learn-contrib/radius_clustering/issues/new)! It's generally best if you get confirmation of your bug or approval for your feature request this way before starting to code. + +### Fork & create a branch + +If you've decided to contribute, you'll need to fork the repository and create a new branch. + +```bash +git checkout -b my-new-feature +``` + +## Getting started + +To get started with the development, you need to install the package in an editable mode with all the development dependencies. It is highly recommended to do this in a virtual environment. + +```bash +pip install -e ".[dev]" +``` + +This will install the package and all the tools needed for testing and linting. + +## Running Tests + +To ensure that your changes don't break anything, please run the test suite. + +```bash +pytest +``` + +## Code Style + +This project uses `ruff` for linting and `black` for formatting. We use `pre-commit` to automatically run these tools before each commit. + +To set up `pre-commit`, run: + +```bash +pre-commit install +``` + +This will ensure your contributions match the project's code style. + +## Submitting a Pull Request + +When you're ready to submit your changes, please write a clear and concise pull request message. Make sure to link any relevant issues. + +Thank you for your contribution! From de3ed62a0c2f37da946f1df5f24772d27e8df368 Mon Sep 17 00:00:00 2001 From: Quentin Date: Mon, 7 Jul 2025 10:02:10 +0200 Subject: [PATCH 06/12] link new files in readme --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2b1b09e..ca0f9ee 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,10 @@ If you want to know more about the experiments conducted with the package, pleas ## Contributing -Contributions to Radius Clustering are welcome! Please feel free to submit a Pull Request. +Contributions to Radius Clustering are welcome! + +Please read the [CONTRIBUTING.md](CONTRIBUTING.md) file for details on how to contribute to the project. +Please note that the project is released with a [Code of Conduct](CODE_OF_CONDUCT.md), and we expect all contributors to adhere to it. ## License From 5c8ce8fcfafbe6f7f869e1b82cc83b2d6256c3be Mon Sep 17 00:00:00 2001 From: Quentin Date: Mon, 7 Jul 2025 10:34:07 +0200 Subject: [PATCH 07/12] Add issue and PR templates --- .github/ISSUE_TEMPLATE/bug_report.yml | 75 ++++++++++++++++++++++ .github/ISSUE_TEMPLATE/doc_improvement.yml | 17 +++++ .github/ISSUE_TEMPLATE/feature_request.yml | 25 ++++++++ .github/PULL_REQUEST_TEMPLATE.md | 32 +++++++++ 4 files changed, 149 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE/doc_improvement.yml create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..78e4203 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,75 @@ +name: Bug Report +description: Create a report to help us improve +title: "[Bug]: " +labels: ["bug", "triage"] +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this bug report! + + - type: textarea + id: what-happened + attributes: + label: Describe the bug + description: A clear and concise description of what the bug is. + placeholder: Tell us what you see! + validations: + required: true + + - type: textarea + id: reproduce + attributes: + label: To Reproduce + description: "Steps to reproduce the behavior. Please provide a minimal, self-contained code sample." + placeholder: | + ```python + import numpy as np + from radius_clustering import RadiusClustering + + # Your code here that triggers the bug + ``` + validations: + required: true + + - type: textarea + id: expected + attributes: + label: Expected behavior + description: A clear and concise description of what you expected to happen. + validations: + required: true + + - type: dropdown + id: os + attributes: + label: Operating System + description: What operating system are you using? + options: + - Windows + - macOS + - Linux + validations: + required: true + + - type: input + id: python-version + attributes: + label: Python Version + placeholder: "e.g. 3.11.4" + validations: + required: true + + - type: input + id: package-version + attributes: + label: Package Version + placeholder: "e.g. 1.4.0" + validations: + required: true + + - type: textarea + id: additional-context + attributes: + label: Additional context + description: Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/doc_improvement.yml b/.github/ISSUE_TEMPLATE/doc_improvement.yml new file mode 100644 index 0000000..48d0c3d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/doc_improvement.yml @@ -0,0 +1,17 @@ +name: Documentation improvement +description: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change. +labels: [Documentation, 'Needs Triage'] + +body: +- type: textarea + attributes: + label: Describe the issue linked to the documentation + description: > + Tell us about the confusion introduced in the documentation. + validations: + required: true +- type: textarea + attributes: + label: Suggest a potential alternative/fix + description: > + Tell us how we could improve the documentation in this regard. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..624cf4b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,25 @@ +name: Feature Request +description: Suggest an idea for this project +title: "[Feature]: " +labels: ["enhancement"] +body: + - type: textarea + attributes: + label: Is your feature request related to a problem? Please describe. + description: A clear and concise description of what the problem is. Ex. "I'm always frustrated when..." + validations: + required: true + - type: textarea + attributes: + label: Describe the solution you'd like + description: A clear and concise description of what you want to happen. + validations: + required: true + - type: textarea + attributes: + label: Describe alternatives you've considered + description: A clear and concise description of any alternative solutions or features you've considered. + - type: textarea + attributes: + label: Additional context + description: Add any other context or screenshots about the feature request here. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..f15649c --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,32 @@ +## Description + +Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. + +Fixes # (issue) + +## Type of change + +Please delete options that are not relevant. + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] This change requires a documentation update + +## How Has This Been Tested? + +Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration. + +- [ ] Test A +- [ ] Test B + +## Checklist: + +- [ ] My code follows the style guidelines of this project +- [ ] I have performed a self-review of my own code +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] I have made corresponding changes to the documentation +- [ ] My changes generate no new warnings +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes +- [ ] Any dependent changes have been merged and published in downstream modules From 431d44b0abcec9c03492a07855a274f2b4857d98 Mon Sep 17 00:00:00 2001 From: Quentin Date: Mon, 7 Jul 2025 11:12:38 +0200 Subject: [PATCH 08/12] =?UTF-8?q?Ajout=20de=20l'upload=20des=20rapports=20?= =?UTF-8?q?de=20couverture=20=C3=A0=20Codecov=20dans=20le=20workflow=20de?= =?UTF-8?q?=20tests=20et=20mise=20=C3=A0=20jour=20du=20README=20pour=20inc?= =?UTF-8?q?lure=20le=20badge=20de=20Codecov.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/tests.yml | 6 ++++++ README.md | 1 + 2 files changed, 7 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 243c494..50d84c9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -28,3 +28,9 @@ jobs: - name: Run tests with pytest run: | pytest -v + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5.4.3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + slug: scikit-learn-contrib/radius_clustering diff --git a/README.md b/README.md index ca0f9ee..c54c3ae 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ Code style: Ruff GitHub Actions Workflow Status Python version supported +Codecov

From d853ba980afd5d0b491c030125fdb5a068698850 Mon Sep 17 00:00:00 2001 From: Quentin Date: Mon, 7 Jul 2025 11:57:51 +0200 Subject: [PATCH 09/12] Add notebook and environment for binder --- environment.yml | 7 + notebooks/comparison_example.ipynb | 487 +++++++++++++++++++++++++++++ 2 files changed, 494 insertions(+) create mode 100644 environment.yml create mode 100644 notebooks/comparison_example.ipynb diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..da487bf --- /dev/null +++ b/environment.yml @@ -0,0 +1,7 @@ +name: radius_clustering +dependencies: + - matplotlib>=3.6.2 + - numpy>=2.0 + - scikit-learn>=1.2.2 + - scipy>=1.12.0 + - pandas>=2.0.3 diff --git a/notebooks/comparison_example.ipynb b/notebooks/comparison_example.ipynb new file mode 100644 index 0000000..fb84132 --- /dev/null +++ b/notebooks/comparison_example.ipynb @@ -0,0 +1,487 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4acb9df3", + "metadata": {}, + "source": [ + "# Comparison of Radius Clustering with KMeans on the samples Dataset\n", + "\n", + "\n", + "This example is meant to illustrate the use of the Radius clustering library on several datasets.\n", + "\n", + "The example includes:\n", + "1. Loading the datasets\n", + "2. Applying Radius clustering and k-means clustering\n", + "3. Visualizing the clustering results\n", + "\n", + "This example serves as a simple introduction to using the Radius clustering library on well-known datasets.\n", + "\n", + "**Author: Haenn Quentin**\n", + "\n", + "**@SPDX-License-Identifier: MIT**\n", + "\n", + "\n", + "\n", + "## 1. Load the Iris dataset\n", + "\n", + "We start by loading the Iris dataset using the `fetch_openml` function from `sklearn.datasets`.\n", + "The Iris dataset is a well-known dataset that contains 150 samples of iris flowers.\n", + "Each sample has 4 features: sepal length, sepal width, petal length, and petal width.\n", + "The dataset is labeled with 3 classes: setosa, versicolor, and virginica." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e28a516b", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn import datasets\n", + "from radius_clustering import RadiusClustering\n", + "\n", + "# Load the Iris dataset\n", + "iris = datasets.load_iris()\n", + "X = iris[\"data\"]\n", + "y = iris.target" + ] + }, + { + "cell_type": "markdown", + "id": "b84938fd", + "metadata": {}, + "source": [ + "\n", + "## 2. Visualize the Iris dataset\n", + "\n", + "\n", + "We can visualize the Iris dataset by plotting the dataset. We use PCA to reduce the dimensionality to 3D and plot the dataset in a 3D scatter plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28f37b15", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from sklearn.decomposition import PCA\n", + "import mpl_toolkits.mplot3d\n", + "\n", + "# Reduce the dimensionality of the dataset to 3D using PCA\n", + "pca = PCA(n_components=3)\n", + "iris_reduced = pca.fit_transform(X)\n", + "fig = plt.figure(figsize=(8, 6))\n", + "ax = fig.add_subplot(111, projection=\"3d\", elev=48, azim=134)\n", + "ax.scatter(\n", + " iris_reduced[:, 0],\n", + " iris_reduced[:, 1],\n", + " iris_reduced[:, 2],\n", + " c=y,\n", + " cmap=\"Dark2\",\n", + " s=40,\n", + ")\n", + "# Set plot labels\n", + "ax.set_title(\"Iris dataset in first 3 PCA components\")\n", + "ax.set_xlabel(\"1st eigenvector\")\n", + "ax.set_ylabel(\"2nd eigenvector\")\n", + "ax.set_zlabel(\"3rd eigenvector\")\n", + "\n", + "# Hide tick labels\n", + "ax.xaxis.set_ticklabels([])\n", + "ax.yaxis.set_ticklabels([])\n", + "ax.zaxis.set_ticklabels([])\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "cd38d50b", + "metadata": {}, + "source": [ + "\n", + "## 3. Compute Clustering with Radius Clustering\n", + "\n", + "We can now apply Radius clustering to the Iris dataset.\n", + "We create an instance of the `RadiusClustering` class and fit it to the Iris dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9282ec34", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "rad = RadiusClustering(manner=\"exact\", radius=1.43)\n", + "t0 = time.time()\n", + "rad.fit(X)\n", + "t_rad = time.time() - t0" + ] + }, + { + "cell_type": "markdown", + "id": "2653845e", + "metadata": {}, + "source": [ + "\n", + "## 4. Compute KMeans Clustering for Comparison\n", + "\n", + "We also apply KMeans clustering to the Iris dataset for comparison.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7e993f5", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from sklearn.cluster import KMeans\n", + "\n", + "k_means = KMeans(n_clusters=3, n_init=10)\n", + "t0 = time.time()\n", + "k_means.fit(X)\n", + "t_kmeans = time.time() - t0" + ] + }, + { + "cell_type": "markdown", + "id": "d1072a7f", + "metadata": {}, + "source": [ + "## 5. Establishing parity between clusters\n", + "\n", + "We want to have the same color for the same cluster in both plots.\n", + "We can achieve this by matching the cluster labels of the Radius clustering and the KMeans clustering.\n", + "First we define a function to retrieve the cluster centers from the Radius clustering and KMeans clustering and\n", + "match them pairwise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ac48cdf", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def get_order_labels(kmeans, rad, data):\n", + " centers1_cpy = kmeans.cluster_centers_.copy()\n", + " centers2_cpy = data[rad.centers_].copy()\n", + " order = []\n", + " # For each center in the first clustering, find the closest center in the second clustering\n", + " for center in centers1_cpy:\n", + " match = pairwise_distances_argmin([center], centers2_cpy)\n", + " # if there is only one center left, assign it to the last cluster label not yet assigned\n", + " if len(centers2_cpy) == 1:\n", + " for i in range(len(centers1_cpy)):\n", + " if i not in order:\n", + " order.append(i)\n", + " break\n", + " break\n", + " # get coordinates of the center in the second clustering\n", + " coordinates = centers2_cpy[match]\n", + " # find the closest point in the data to the center to get the cluster label\n", + " closest_point = pairwise_distances_argmin(coordinates, data)\n", + " match_label = rad.labels_[closest_point]\n", + " # remove the center from the second clustering\n", + " centers2_cpy = np.delete(centers2_cpy, match, axis=0)\n", + " # add the cluster label to the order\n", + " order.append(int(match_label[0]))\n", + " return order\n", + "\n", + "\n", + "from sklearn.metrics.pairwise import pairwise_distances_argmin\n", + "\n", + "rad_centers_index = np.array(rad.centers_)\n", + "order = get_order_labels(k_means, rad, X)\n", + "\n", + "kmeans_centers = k_means.cluster_centers_\n", + "rad_centers = rad_centers_index[order]\n", + "rad_centers_coordinates = X[rad_centers]\n", + "\n", + "# Pair the cluster labels\n", + "kmeans_labels = pairwise_distances_argmin(X, kmeans_centers)\n", + "rad_labels = pairwise_distances_argmin(X, rad_centers_coordinates)" + ] + }, + { + "cell_type": "markdown", + "id": "b428447c", + "metadata": {}, + "source": [ + "### Plotting the results and the difference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69c095ee", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure(figsize=(12, 6))\n", + "fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)\n", + "colors = [\"#4EACC5\", \"#FF9C34\", \"#4E9A06\"]\n", + "\n", + "# KMeans\n", + "ax = fig.add_subplot(1, 3, 1, projection=\"3d\", elev=48, azim=134, roll=0)\n", + "\n", + "ax.scatter(\n", + " iris_reduced[:, 0],\n", + " iris_reduced[:, 1],\n", + " iris_reduced[:, 2],\n", + " c=kmeans_labels,\n", + " cmap=\"Dark2\",\n", + " s=40,\n", + ")\n", + "# adapting center coordinates to the 3D plot\n", + "kmeans_centers = pca.transform(kmeans_centers)\n", + "ax.scatter(\n", + " kmeans_centers[:, 0],\n", + " kmeans_centers[:, 1],\n", + " kmeans_centers[:, 2],\n", + " c=\"r\",\n", + " s=200,\n", + ")\n", + "ax.set_title(\"KMeans\")\n", + "ax.set_xticks(())\n", + "ax.set_yticks(())\n", + "ax.set_zticks(())\n", + "\n", + "ax.text3D(-3.5, 3, 1.0, \"train time: %.2fs\\ninertia: %f\" % (t_kmeans, k_means.inertia_))\n", + "\n", + "# MDS\n", + "ax = fig.add_subplot(1, 3, 2, projection=\"3d\", elev=48, azim=134, roll=0)\n", + "ax.scatter(\n", + " iris_reduced[:, 0],\n", + " iris_reduced[:, 1],\n", + " iris_reduced[:, 2],\n", + " c=rad_labels,\n", + " cmap=\"Dark2\",\n", + " s=40,\n", + ")\n", + "# adapting center coordinates to the 3D plot\n", + "rad_centers_coordinates = pca.transform(rad_centers_coordinates)\n", + "ax.scatter(\n", + " rad_centers_coordinates[:, 0],\n", + " rad_centers_coordinates[:, 1],\n", + " rad_centers_coordinates[:, 2],\n", + " c=\"r\",\n", + " s=200,\n", + ")\n", + "ax.set_title(\"MDS Clustering\")\n", + "ax.set_xticks(())\n", + "ax.set_yticks(())\n", + "ax.set_zticks(())\n", + "ax.text3D(-3.5, 3, 0.0, \"train time: %.2fs\" % t_rad)\n", + "\n", + "# Initialize the different array to all False\n", + "different = rad_labels == 4\n", + "ax = fig.add_subplot(1, 3, 3, projection=\"3d\", elev=48, azim=134, roll=0)\n", + "\n", + "for k in range(3):\n", + " different += (kmeans_labels == k) != (rad_labels == k)\n", + "\n", + "identical = np.logical_not(different)\n", + "ax.scatter(\n", + " iris_reduced[identical, 0], iris_reduced[identical, 1], color=\"#bbbbbb\", marker=\".\"\n", + ")\n", + "ax.scatter(iris_reduced[different, 0], iris_reduced[different, 1], color=\"m\")\n", + "ax.set_title(\"Difference\")\n", + "ax.set_xticks(())\n", + "ax.set_yticks(())\n", + "ax.set_zticks(())\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "3d1c4fcf", + "metadata": {}, + "source": [ + "## Another difference plot\n", + "\n", + "As we saw, the difference plot is not very informative using Iris.\n", + "We'll use a different dataset to show the difference plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea3d0438", + "metadata": {}, + "outputs": [], + "source": [ + "wine = datasets.load_wine()\n", + "X = wine.data\n", + "y = wine.target\n", + "pca = PCA(n_components=3)\n", + "wine_reduced = pca.fit_transform(X)\n", + "\n", + "# Compute clustering with MDS\n", + "\n", + "rad = RadiusClustering(manner=\"exact\", radius=232.09)\n", + "t0 = time.time()\n", + "rad.fit(X)\n", + "t_rad = time.time() - t0\n", + "\n", + "# Compute KMeans clustering for comparison\n", + "\n", + "k_means = KMeans(n_clusters=3, n_init=10)\n", + "t0 = time.time()\n", + "k_means.fit(X)\n", + "t_kmeans = time.time() - t0" + ] + }, + { + "cell_type": "markdown", + "id": "3929dee4", + "metadata": {}, + "source": [ + "## Reapplying the same process as before" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24449b3a", + "metadata": {}, + "outputs": [], + "source": [ + "rad_centers_index = np.array(rad.centers_)\n", + "order = get_order_labels(k_means, rad, X)\n", + "\n", + "kmeans_centers = k_means.cluster_centers_\n", + "rad_centers = rad_centers_index[order]\n", + "rad_centers_coordinates = X[rad_centers]\n", + "\n", + "# Pair the cluster labels\n", + "kmeans_labels = pairwise_distances_argmin(X, kmeans_centers)\n", + "rad_labels = pairwise_distances_argmin(X, rad_centers_coordinates)" + ] + }, + { + "cell_type": "markdown", + "id": "3accac5b", + "metadata": {}, + "source": [ + "## Plotting the results and the difference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39235d3c", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure(figsize=(12, 6))\n", + "fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)\n", + "colors = [\"#4EACC5\", \"#FF9C34\", \"#4E9A06\"]\n", + "\n", + "# KMeans\n", + "ax = fig.add_subplot(1, 3, 1, projection=\"3d\", elev=48, azim=134, roll=0)\n", + "\n", + "ax.scatter(\n", + " wine_reduced[:, 0],\n", + " wine_reduced[:, 1],\n", + " wine_reduced[:, 2],\n", + " c=kmeans_labels,\n", + " cmap=\"Dark2\",\n", + " s=40,\n", + ")\n", + "# adapting center coordinates to the 3D plot\n", + "kmeans_centers = pca.transform(kmeans_centers)\n", + "ax.scatter(\n", + " kmeans_centers[:, 0],\n", + " kmeans_centers[:, 1],\n", + " kmeans_centers[:, 2],\n", + " c=\"r\",\n", + " s=200,\n", + ")\n", + "ax.set_title(\"KMeans\")\n", + "ax.set_xticks(())\n", + "ax.set_yticks(())\n", + "ax.set_zticks(())\n", + "\n", + "ax.text3D(\n", + " 60.0, 80.0, 0.0, \"train time: %.2fs\\ninertia: %f\" % (t_kmeans, k_means.inertia_)\n", + ")\n", + "\n", + "# MDS\n", + "ax = fig.add_subplot(1, 3, 2, projection=\"3d\", elev=48, azim=134, roll=0)\n", + "ax.scatter(\n", + " wine_reduced[:, 0],\n", + " wine_reduced[:, 1],\n", + " wine_reduced[:, 2],\n", + " c=rad_labels,\n", + " cmap=\"Dark2\",\n", + " s=40,\n", + ")\n", + "# adapting center coordinates to the 3D plot\n", + "rad_centers_coordinates = pca.transform(rad_centers_coordinates)\n", + "ax.scatter(\n", + " rad_centers_coordinates[:, 0],\n", + " rad_centers_coordinates[:, 1],\n", + " rad_centers_coordinates[:, 2],\n", + " c=\"r\",\n", + " s=200,\n", + ")\n", + "ax.set_title(\"MDS Clustering\")\n", + "ax.set_xticks(())\n", + "ax.set_yticks(())\n", + "ax.set_zticks(())\n", + "ax.text3D(60.0, 80.0, 0.0, \"train time: %.2fs\" % t_rad)\n", + "\n", + "# Initialize the different array to all False\n", + "different = rad_labels == 4\n", + "ax = fig.add_subplot(1, 3, 3, projection=\"3d\", elev=48, azim=134, roll=0)\n", + "\n", + "for k in range(3):\n", + " different += (kmeans_labels == k) != (rad_labels == k)\n", + "\n", + "identical = np.logical_not(different)\n", + "ax.scatter(\n", + " wine_reduced[identical, 0], wine_reduced[identical, 1], color=\"#bbbbbb\", marker=\".\"\n", + ")\n", + "ax.scatter(wine_reduced[different, 0], wine_reduced[different, 1], color=\"m\")\n", + "ax.set_title(\"Difference\")\n", + "ax.set_xticks(())\n", + "ax.set_yticks(())\n", + "ax.set_zticks(())\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "c1172f38", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "In this example, we applied Radius clustering to the Iris and Wine datasets and compared it with KMeans clustering.\n", + "We visualized the clustering results and the difference between the two clustering algorithms.\n", + "We saw that Radius Clustering can lead to smaller clusters than kmeans, which produces much more equilibrate clusters.\n", + "The difference plot can be very useful to see where the two clustering algorithms differ." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 9dbd0910f92575449082891eb024b0187b421bc0 Mon Sep 17 00:00:00 2001 From: Quentin Date: Mon, 7 Jul 2025 11:59:38 +0200 Subject: [PATCH 10/12] update README with binder badge --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index c54c3ae..6b2a4cc 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ GitHub Actions Workflow Status Python version supported Codecov +Binder +

From e02aeb74eca12c87f26764f690a7459cf5110f72 Mon Sep 17 00:00:00 2001 From: Quentin Date: Mon, 7 Jul 2025 12:37:01 +0200 Subject: [PATCH 11/12] Update README with new citation methods --- README.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6b2a4cc..88e5c1c 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ GitHub Actions Workflow Status Python version supported Codecov -Binder +Binder

@@ -105,6 +105,26 @@ Please note that the project is released with a [Code of Conduct](CODE_OF_CONDUC This project is licensed under the GNU General Public License v3.0 - see the LICENSE file for details. +## How to cite this work + +If you use Radius Clustering in your research, please cite the following paper and the software itself: + +```bibtex +@inproceedings{haenn_clustering2024, + TITLE = {{Clustering Under Radius Constraints Using Minimum Dominating Sets}}, + AUTHOR = {Haenn, Quentin and Chardin, Brice and Baron, Micka{\"e}l}, + URL = {https://hal.science/hal-04533921}, + BOOKTITLE = {{Lecture Notes in Artificial Intelligence}}, + ADDRESS = {Poitiers, France}, + PUBLISHER = {{Springer}}, + YEAR = {2024}, + MONTH = Jun, + KEYWORDS = {Constrained Clustering ; Radius Based Clustering ; Minimum Dominating Set ; Constrained Clustering Radius Based Clustering Minimum Dominating Set}, + PDF = {https://hal.science/hal-04533921v1/file/clustering_under_radius_using_mds.pdf}, + HAL_ID = {hal-04533921}, + HAL_VERSION = {v1}, +} +``` ## Acknowledgments From cd3d598a095fdb9beca976681fff2e7994db5601 Mon Sep 17 00:00:00 2001 From: Quentin Date: Mon, 7 Jul 2025 12:40:36 +0200 Subject: [PATCH 12/12] update labels tags in templates --- .github/ISSUE_TEMPLATE/doc_improvement.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/doc_improvement.yml b/.github/ISSUE_TEMPLATE/doc_improvement.yml index 48d0c3d..2393976 100644 --- a/.github/ISSUE_TEMPLATE/doc_improvement.yml +++ b/.github/ISSUE_TEMPLATE/doc_improvement.yml @@ -1,6 +1,6 @@ name: Documentation improvement description: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change. -labels: [Documentation, 'Needs Triage'] +labels: ["documentation", "triage"] body: - type: textarea