diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c0985c4c..b895c4ad 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,72 +1,27 @@ +# This workflow is just to test that the docs build successfully. name: docs - +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true on: - # Trigger manually workflow_dispatch: - - # Trigger on any push to the main push: branches: - main - - development - - # Trigger on any push to a PR that targets main pull_request: branches: - main - - development - permissions: contents: write - -env: - name: "ConfigSpace" - jobs: - build-and-deploy: + build: runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Setup Python - uses: actions/setup-python@v3 - with: - python-version: "3.8" - - - name: Install dependencies - run: | - pip install build - pip install ".[docs]" - - - name: Make docs - run: | - make clean - make docs - - - name: Pull latest gh-pages - if: (contains(github.ref, 'develop') || contains(github.ref, 'main')) && github.event_name == 'push' - run: | - cd .. - git clone https://github.com/${{ github.repository }}.git --branch gh-pages --single-branch gh-pages - - - name: Copy new docs into gh-pages - if: (contains(github.ref, 'develop') || contains(github.ref, 'main')) && github.event_name == 'push' - run: | - branch_name=${GITHUB_REF##*/} - cd ../gh-pages - rm -rf $branch_name - cp -r ../${{ env.name }}/docs/build/html $branch_name - - - name: Push to gh-pages - if: (contains(github.ref, 'develop') || contains(github.ref, 'main')) && github.event_name == 'push' - run: | - last_commit=$(git log --pretty=format:"%an: %s") - cd ../gh-pages - branch_name=${GITHUB_REF##*/} - git add $branch_name/ - git config --global user.name 'Github Actions' - git config --global user.email 'not@mail.com' - git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} - git commit -am "$last_commit" - git push + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: "Install dependancies" + run: python -m pip install -e ".[dev]" + - name: "Build Docs" + run: mkdocs build --clean --strict diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index bca7073d..85f15711 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -1,4 +1,7 @@ name: Tests +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true on: # Allow to manually trigger through github API @@ -8,13 +11,11 @@ on: push: branches: - main - - development # Triggers with push to a pr aimed at master pull_request: branches: - main - - development schedule: # Every day at 7AM UTC @@ -31,21 +32,9 @@ env: --durations=20 -v - # Version to run code-cov on - # NOTE: These are only acessible inside a jobs **steps** and not in the job setup, - # Hence, some of these varialbes are copied and are just here for reference - # - code-cov-active: true # Copied in job setup - code-cov-os: ubuntu-latest # Copied in job setup - code-cov-python-version: "3.8" - code-cov-args: >- - --cov=ConfigSpace - --cov-report=xml - jobs: + test: - # General unit tests - source-test: name: ${{ matrix.python-version }}-${{ matrix.os }} runs-on: ${{ matrix.os }} @@ -56,153 +45,24 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] os: ["ubuntu-latest", "macos-latest", "windows-latest"] steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install ${{ env.package-name }} - run: | - python -m pip install --upgrade pip - python -m pip install -e ".${{ env.extra-requires }}" - - - name: Store git status - id: status-before - shell: bash - run: | - echo "::set-output name=BEFORE::$(git status --porcelain -b)" - - - name: Tests - timeout-minutes: 45 - run: | - pytest ${{ env.pytest-args }} ${{ env.test-dir }} - - - name: Check for files left behind by test - run: | - before="${{ steps.status-before.outputs.BEFORE }}" - after="$(git status --porcelain -b)" - if [[ "$before" != "$after" ]]; then - echo "git status from before: $before" - echo "git status from after: $after" - echo "Not all generated files have been deleted!" - exit 1 - fi - - # Testing with conda - conda-tests: - name: conda-${{ matrix.python-version }}-${{ matrix.os }} - runs-on: ${{ matrix.os }} - defaults: - run: - shell: bash -l {0} # Default to using bash on all and load (-l) .bashrc which miniconda uses - - strategy: - fail-fast: false - matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] - os: ["ubuntu-latest", "macos-latest", "windows-latest"] - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Conda install - uses: conda-incubator/setup-miniconda@v2 + uses: actions/setup-python@v5 with: - auto-update-conda: true python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: pyproject.toml - name: Install ${{ env.package-name }} run: | - python -V - python -m pip install --upgrade pip - python -m pip install wheel python -m pip install -e ".${{ env.extra-requires }}" - name: Tests - timeout-minutes: 45 run: | - pytest ${{ env.pytest-args }} ${{ env.test-dir }} - # - # Testing a dist install - dist-test: - name: dist-${{ matrix.python-version }}-${{ matrix.os }} - - runs-on: ${{ matrix.os }} - defaults: - run: - shell: bash - - strategy: - fail-fast: false - matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] - os: ["ubuntu-latest", "macos-latest", "windows-latest"] - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Create sdist - id: sdist - run: | - python -m pip install --upgrade pip build - python -m build --sdist - echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> "$GITHUB_ENV" - - - name: Install ${{ env.package-name }} - run: | - python -m pip install ${{ env.sdist_name }}${{ env.extra-requires }} - - - name: Tests - timeout-minutes: 45 - run: | - pytest ${{ env.pytest-args }} ${{ env.test-dir }} - - - # Testing with codecov coverage uploaded - codecov-test: - name: codecov-test - - runs-on: "ubuntu-latest" - defaults: - run: - shell: bash - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: ${{ env.code-cov-python-version }} - - - name: Install ${{ env.package-name }} - run: | - python -m pip install --upgrade pip - python -m pip install wheel - python -m pip install -e ".${{ env.extra-requires }}" - - - name: Tests - timeout-minutes: 45 - run: | - pytest ${{ env.pytest-args }} ${{ env.code-cov-args }} ${{ env.test-dir }} - - - name: Upload coverage - uses: codecov/codecov-action@v2 - with: - fail_ci_if_error: true - verbose: true + pytest ${{ env.test-dir }} diff --git a/.gitignore b/.gitignore index 4b8afa95..f657f292 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ docs/examples/* # C extensions *.so *.c +*.jukit # Packages *.egg @@ -74,4 +75,5 @@ prof/ .venv # Running pre-commit seems to generate these -.mypy_cache \ No newline at end of file +.mypy_cache +*.speedscope \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 645a947f..53c08736 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,32 +38,15 @@ repos: hooks: - id: mypy files: '^ConfigSpace/.*\.py$' + additional_dependencies: + - "types-pyyaml" args: - "--no-warn-return-any" # Disable this because it doesn't know about 3rd party imports - "--ignore-missing-imports" - "--show-traceback" - - id: mypy - files: '^test/.*\.py$' - args: - - "--no-warn-return-any" # Disable this because it doesn't know about 3rd party imports - - "--ignore-missing-imports" - - "--show-traceback" - - "--disable-error-code" - - "no-untyped-def" - - - repo: https://github.com/psf/black - rev: 23.3.0 - hooks: - - id: black - args: ["--config=pyproject.toml"] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.263 + rev: v0.4.0 hooks: - id: ruff - args: ["--fix", "ConfigSpace", "test"] - - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.15.0 - hooks: - - id: cython-lint - args: [--ignore=E501] - - id: double-quote-cython-strings + args: [--fix, --exit-non-zero-on-fix, --no-cache] + - id: ruff-format diff --git a/ConfigSpace/api/types/categorical.py b/ConfigSpace/api/types/categorical.py deleted file mode 100644 index c0c84a27..00000000 --- a/ConfigSpace/api/types/categorical.py +++ /dev/null @@ -1,141 +0,0 @@ -from __future__ import annotations - -from typing import ( - Sequence, - Union, - overload, -) -from typing_extensions import Literal, TypeAlias - -from ConfigSpace.hyperparameters import CategoricalHyperparameter, OrdinalHyperparameter - -# We only accept these types in `items` -T: TypeAlias = Union[str, int, float] - - -# ordered False -> CategoricalHyperparameter -@overload -def Categorical( - name: str, - items: Sequence[T], - *, - default: T | None = None, - weights: Sequence[float] | None = None, - ordered: Literal[False], - meta: dict | None = None, -) -> CategoricalHyperparameter: - ... - - -# ordered True -> OrdinalHyperparameter -@overload -def Categorical( - name: str, - items: Sequence[T], - *, - default: T | None = None, - weights: Sequence[float] | None = None, - ordered: Literal[True], - meta: dict | None = None, -) -> OrdinalHyperparameter: - ... - - -# ordered bool (unknown) -> Either -@overload -def Categorical( - name: str, - items: Sequence[T], - *, - default: T | None = None, - weights: Sequence[float] | None = None, - ordered: bool = ..., - meta: dict | None = None, -) -> CategoricalHyperparameter | OrdinalHyperparameter: - ... - - -def Categorical( - name: str, - items: Sequence[T], - *, - default: T | None = None, - weights: Sequence[float] | None = None, - ordered: bool = False, - meta: dict | None = None, -) -> CategoricalHyperparameter | OrdinalHyperparameter: - """Creates a Categorical Hyperparameter. - - CategoricalHyperparameter's can be used to represent a discrete - choice. Optionally, you can specify that these values are also ordered in - some manner, e.g. ``["small", "medium", "large"]``. - - .. code:: python - - # A simple categorical hyperparameter - c = Categorical("animals", ["cat", "dog", "mouse"]) - - # With a default - c = Categorical("animals", ["cat", "dog", "mouse"], default="mouse") - - # Make them weighted - c = Categorical("animals", ["cat", "dog", "mouse"], weights=[0.1, 0.8, 3.14]) - - # Specify it's an OrdinalHyperparameter (ordered categories) - # ... note that you can't apply weights to an Ordinal - o = Categorical("size", ["small", "medium", "large"], ordered=True) - - # Add some meta information for your own tracking - c = Categorical("animals", ["cat", "dog", "mouse"], meta={"use": "Favourite Animal"}) - - Note - ---- - ``Categorical`` is actually a function, please use the corresponding return types if - doing an `isinstance(param, type)` check with either - :py:class:`~ConfigSpace.hyperparameters.CategoricalHyperparameter` - and/or :py:class:`~ConfigSpace.hyperparameters.OrdinalHyperparameter`. - - Parameters - ---------- - name: str - The name of the hyperparameter - - items: Sequence[T], - A list of items to put in the category. Note that there are limitations: - - * Can't use `None`, use a string "None" instead and convert as required. - * Can't have duplicate categories, use weights if required. - - default: T | None = None - The default value of the categorical hyperparameter - - weights: Sequence[float] | None = None - The weights to apply to each categorical. Each item will be sampled according - to these weights. - - ordered: bool = False - Whether the categorical is ordered or not. If True, this will return an - :py:class:`OrdinalHyperparameter`, otherwise it remain a - :py:class:`CategoricalHyperparameter`. - - meta: dict | None = None - Any additional meta information you would like to store along with the hyperparamter. - """ - if ordered and weights is not None: - raise ValueError("Can't apply `weights` to `ordered` Categorical") - - if ordered: - return OrdinalHyperparameter( - name=name, - sequence=items, - default_value=default, - meta=meta, - ) - - return CategoricalHyperparameter( - name=name, - choices=items, - default_value=default, - weights=weights, - meta=meta, - ) diff --git a/ConfigSpace/api/types/float.py b/ConfigSpace/api/types/float.py deleted file mode 100644 index c7847613..00000000 --- a/ConfigSpace/api/types/float.py +++ /dev/null @@ -1,185 +0,0 @@ -from __future__ import annotations - -from typing import overload - -from ConfigSpace.api.distributions import Beta, Distribution, Normal, Uniform -from ConfigSpace.hyperparameters import ( - BetaFloatHyperparameter, - NormalFloatHyperparameter, - UniformFloatHyperparameter, -) - - -# Uniform | None -> UniformFloatHyperparameter -@overload -def Float( - name: str, - bounds: tuple[float, float] | None = ..., - *, - distribution: Uniform | None = ..., - default: float | None = ..., - q: int | float | None = ..., - log: bool = ..., - meta: dict | None = ..., -) -> UniformFloatHyperparameter: - ... - - -# Normal -> NormalFloatHyperparameter -@overload -def Float( - name: str, - bounds: tuple[float, float] | None = ..., - *, - distribution: Normal, - default: float | None = ..., - q: int | float | None = ..., - log: bool = ..., - meta: dict | None = ..., -) -> NormalFloatHyperparameter: - ... - - -# Beta -> BetaFloatHyperparameter -@overload -def Float( - name: str, - bounds: tuple[float, float] | None = ..., - *, - distribution: Beta, - default: float | None = ..., - q: int | None = ..., - log: bool = ..., - meta: dict | None = ..., -) -> BetaFloatHyperparameter: - ... - - -def Float( - name: str, - bounds: tuple[float, float] | None = None, - *, - distribution: Distribution | None = None, - default: float | None = None, - q: int | None = None, - log: bool = False, - meta: dict | None = None, -) -> UniformFloatHyperparameter | NormalFloatHyperparameter | BetaFloatHyperparameter: - """Create a FloatHyperparameter. - - .. code:: python - - # Uniformly distributed - Float("a", (1, 10)) - Float("a", (1, 10), distribution=Uniform()) - - # Normally distributed at 2 with std 3 - Float("b", distribution=Normal(2, 3)) - Float("b", (0, 5), distribution=Normal(2, 3)) # ... bounded - - # Beta distributed with alpha 1 and beta 2 - Float("c", distribution=Beta(1, 2)) - Float("c", (0, 3), distribution=Beta(1, 2)) # ... bounded - - # Give it a default value - Float("a", (1, 10), default=4.3) - - # Sample on a log scale - Float("a", (1, 100), log=True) - - # Quantized into three brackets - Float("a", (1, 10), q=3) - - # Add meta info to the param - Float("a", (1.0, 10), meta={"use": "For counting chickens"}) - - Note - ---- - `Float` is actually a function, please use the corresponding return types if - doing an `isinstance(param, type)` check and not `Float`. - - Parameters - ---------- - name : str - The name to give to this hyperparameter - - bounds : tuple[float, float] | None = None - The bounds to give to the float. Note that by default, this is required - for Uniform distribution, which is the default distribution - - distribution : Uniform | Normal | Beta, = Uniform - The distribution to use for the hyperparameter. See above - - default : float | None = None - The default value to give to the hyperparameter. - - q : float | int | None = None - The quantization factor, must evenly divide the boundaries. - - Note - ---- - Quantization points act are not equal and require experimentation - to be certain about - - * https://github.com/automl/ConfigSpace/issues/264 - - log : bool = False - Whether to this parameter lives on a log scale - - meta : dict | None = None - Any meta information you want to associate with this parameter - - Returns - ------- - UniformFloatHyperparameter | NormalFloatHyperparameter | BetaFloatHyperparameter - Returns the corresponding hyperparameter type - """ - if distribution is None: - distribution = Uniform() - - if bounds is None and isinstance(distribution, Uniform): - raise ValueError("`bounds` must be specifed for Uniform distribution") - - if bounds is None: - lower, upper = (None, None) - else: - lower, upper = bounds - - if isinstance(distribution, Uniform): - return UniformFloatHyperparameter( - name=name, - lower=lower, - upper=upper, - default_value=default, - q=q, - log=log, - meta=meta, - ) - - if isinstance(distribution, Normal): - return NormalFloatHyperparameter( - name=name, - lower=lower, - upper=upper, - default_value=default, - mu=distribution.mu, - sigma=distribution.sigma, - q=q, - log=log, - meta=meta, - ) - - if isinstance(distribution, Beta): - return BetaFloatHyperparameter( - name=name, - lower=lower, - upper=upper, - alpha=distribution.alpha, - beta=distribution.beta, - default_value=default, - q=q, - log=log, - meta=meta, - ) - - raise ValueError(f"Unknown distribution type {type(distribution)}") diff --git a/ConfigSpace/api/types/integer.py b/ConfigSpace/api/types/integer.py deleted file mode 100644 index 099cbdbc..00000000 --- a/ConfigSpace/api/types/integer.py +++ /dev/null @@ -1,195 +0,0 @@ -from __future__ import annotations - -from typing import overload - -from ConfigSpace.api.distributions import Beta, Distribution, Normal, Uniform -from ConfigSpace.hyperparameters import ( - BetaIntegerHyperparameter, - NormalIntegerHyperparameter, - UniformIntegerHyperparameter, -) - - -# Uniform | None -> UniformIntegerHyperparameter -@overload -def Integer( - name: str, - bounds: tuple[int, int] | None = ..., - *, - distribution: Uniform | None = ..., - default: int | None = ..., - q: int | None = ..., - log: bool = ..., - meta: dict | None = ..., -) -> UniformIntegerHyperparameter: - ... - - -# Normal -> NormalIntegerHyperparameter -@overload -def Integer( - name: str, - bounds: tuple[int, int] | None = ..., - *, - distribution: Normal, - default: int | None = ..., - q: int | None = ..., - log: bool = ..., - meta: dict | None = ..., -) -> NormalIntegerHyperparameter: - ... - - -# Beta -> BetaIntegerHyperparameter -@overload -def Integer( - name: str, - bounds: tuple[int, int] | None = ..., - *, - distribution: Beta, - default: int | None = ..., - q: int | None = ..., - log: bool = ..., - meta: dict | None = ..., -) -> BetaIntegerHyperparameter: - ... - - -def Integer( - name: str, - bounds: tuple[int, int] | None = None, - *, - distribution: Distribution | None = None, - default: int | None = None, - q: int | None = None, - log: bool = False, - meta: dict | None = None, -) -> UniformIntegerHyperparameter | NormalIntegerHyperparameter | BetaIntegerHyperparameter: - """Create an IntegerHyperparameter. - - .. code:: python - - # Uniformly distributed - Integer("a", (1, 10)) - Integer("a", (1, 10), distribution=Uniform()) - - # Normally distributed at 2 with std 3 - Integer("b", distribution=Normal(2, 3)) - Integer("b", (0, 5), distribution=Normal(2, 3)) # ... bounded - - # Beta distributed with alpha 1 and beta 2 - Integer("c", distribution=Beta(1, 2)) - Integer("c", (0, 3), distribution=Beta(1, 2)) # ... bounded - - # Give it a default value - Integer("a", (1, 10), default=4) - - # Sample on a log scale - Integer("a", (1, 100), log=True) - - # Quantized into three brackets - Integer("a", (1, 10), q=3) - - # Add meta info to the param - Integer("a", (1, 10), meta={"use": "For counting chickens"}) - - Note - ---- - `Integer` is actually a function, please use the corresponding return types if - doing an `isinstance(param, type)` check and not `Integer`. - - Parameters - ---------- - name : str - The name to give to this hyperparameter - - bounds : tuple[int, int] | None = None - The bounds to give to the integer. Note that by default, this is required - for Uniform distribution, which is the default distribution - - distribution : Uniform | Normal | Beta, = Uniform - The distribution to use for the hyperparameter. See above - - default : int | None = None - The default value to give to the hyperparameter. - - q : int | None = None - The quantization factor, must evenly divide the boundaries. - Sampled values will be - - .. code:: - - full range - 1 4 7 10 - |--------------| - | | | | q = 3 - - All samples here will then be in {1, 4, 7, 10} - - Note - ---- - Quantization points act are not equal and require experimentation - to be certain about - - * https://github.com/automl/ConfigSpace/issues/264 - - log : bool = False - Whether to this parameter lives on a log scale - - meta : dict | None = None - Any meta information you want to associate with this parameter - - Returns - ------- - UniformIntegerHyperparameter | NormalIntegerHyperparameter | BetaIntegerHyperparameter - Returns the corresponding hyperparameter type - """ - if distribution is None: - distribution = Uniform() - - if bounds is None and isinstance(distribution, Uniform): - raise ValueError("`bounds` must be specifed for Uniform distribution") - - if bounds is None: - lower, upper = (None, None) - else: - lower, upper = bounds - - if isinstance(distribution, Uniform): - return UniformIntegerHyperparameter( - name=name, - lower=lower, - upper=upper, - q=q, - log=log, - default_value=default, - meta=meta, - ) - - if isinstance(distribution, Normal): - return NormalIntegerHyperparameter( - name=name, - lower=lower, - upper=upper, - q=q, - log=log, - default_value=default, - meta=meta, - mu=distribution.mu, - sigma=distribution.sigma, - ) - - if isinstance(distribution, Beta): - return BetaIntegerHyperparameter( - name=name, - lower=lower, - upper=upper, - q=q, - log=log, - default_value=default, - meta=meta, - alpha=distribution.alpha, - beta=distribution.beta, - ) - - raise ValueError(f"Unknown distribution type {type(distribution)}") diff --git a/ConfigSpace/c_util.pyx b/ConfigSpace/c_util.pyx deleted file mode 100644 index 1f028305..00000000 --- a/ConfigSpace/c_util.pyx +++ /dev/null @@ -1,355 +0,0 @@ -from collections import deque - -import numpy as np -from ConfigSpace.forbidden import AbstractForbiddenComponent -from ConfigSpace.forbidden cimport AbstractForbiddenComponent -from ConfigSpace.hyperparameters import Hyperparameter -from ConfigSpace.hyperparameters.hyperparameter cimport Hyperparameter -from ConfigSpace.conditions import ConditionComponent -from ConfigSpace.conditions cimport ConditionComponent -from ConfigSpace.conditions import OrConjunction -from ConfigSpace.exceptions import ( - ForbiddenValueError, - IllegalValueError, - ActiveHyperparameterNotSetError, - InactiveHyperparameterSetError, -) - -from libc.stdlib cimport malloc, free -cimport numpy as np - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - - -cpdef int check_forbidden(list forbidden_clauses, np.ndarray vector) except 1: - cdef int Iforbidden = len(forbidden_clauses) - cdef AbstractForbiddenComponent clause - - for i in range(Iforbidden): - clause = forbidden_clauses[i] - if clause.c_is_forbidden_vector(vector, strict=False): - raise ForbiddenValueError("Given vector violates forbidden clause %s" % (str(clause))) - - -cpdef int check_configuration( - self, - np.ndarray vector, - bint allow_inactive_with_values -) except 1: - cdef str hp_name - cdef Hyperparameter hyperparameter - cdef int hyperparameter_idx - cdef DTYPE_t hp_value - cdef int add - cdef ConditionComponent condition - cdef Hyperparameter child - cdef list conditions - cdef list children - cdef set inactive - cdef set visited - - cdef int* active - active = malloc(sizeof(int) * len(vector)) - for i in range(len(vector)): - active[i] = 0 - - unconditional_hyperparameters = self.get_all_unconditional_hyperparameters() - to_visit = deque() - visited = set() - to_visit.extendleft(unconditional_hyperparameters) - inactive = set() - - for ch in unconditional_hyperparameters: - active[self._hyperparameter_idx[ch]] = 1 - - while len(to_visit) > 0: - hp_name = to_visit.pop() - visited.add(hp_name) - hp_idx = self._hyperparameter_idx[hp_name] - hyperparameter = self._hyperparameters[hp_name] - hp_value = vector[hp_idx] - - if not np.isnan(hp_value) and not hyperparameter.is_legal_vector(hp_value): - free(active) - raise IllegalValueError(hyperparameter, hp_value) - - children = self._children_of[hp_name] - for child in children: - if child.name not in inactive: - conditions = self._parent_conditions_of[child.name] - add = True - for condition in conditions: - if not condition._evaluate_vector(vector): - add = False - inactive.add(child.name) - break - if add: - hyperparameter_idx = self._hyperparameter_idx[child.name] - active[hyperparameter_idx] = 1 - to_visit.appendleft(child.name) - - if active[hp_idx] and np.isnan(hp_value): - free(active) - raise ActiveHyperparameterNotSetError(hyperparameter) - - for hp_idx in self._idx_to_hyperparameter: - - if not allow_inactive_with_values and not active[hp_idx] and not np.isnan(vector[hp_idx]): - # Only look up the value (in the line above) if the hyperparameter is inactive! - hp_name = self._idx_to_hyperparameter[hp_idx] - hp_value = vector[hp_idx] - free(active) - raise InactiveHyperparameterSetError(hyperparameter, hp_value) - - free(active) - self._check_forbidden(vector) - - -cpdef np.ndarray correct_sampled_array( - np.ndarray[DTYPE_t, ndim=1] vector, - list forbidden_clauses_unconditionals, - list forbidden_clauses_conditionals, - list hyperparameters_with_children, - int num_hyperparameters, - list unconditional_hyperparameters, - dict hyperparameter_to_idx, - dict parent_conditions_of, - dict parents_of, - dict children_of, -): - cdef AbstractForbiddenComponent clause - cdef ConditionComponent condition - cdef int hyperparameter_idx - cdef DTYPE_t NaN = np.NaN - cdef set visited - cdef set inactive - cdef Hyperparameter child - cdef list children - cdef str child_name - cdef list parents - cdef Hyperparameter parent - cdef int parents_visited - cdef list conditions - cdef int add - - cdef int* active - active = malloc(sizeof(int) * num_hyperparameters) - for j in range(num_hyperparameters): - active[j] = 0 - - for j in range(len(forbidden_clauses_unconditionals)): - clause = forbidden_clauses_unconditionals[j] - if clause.c_is_forbidden_vector(vector, strict=False): - free(active) - msg = "Given vector violates forbidden clause %s" % str(clause) - raise ForbiddenValueError(msg) - - hps = deque() - visited = set() - hps.extendleft(hyperparameters_with_children) - - for ch in unconditional_hyperparameters: - active[hyperparameter_to_idx[ch]] = 1 - - inactive = set() - - while len(hps) > 0: - hp = hps.pop() - visited.add(hp) - children = children_of[hp] - for child in children: - child_name = child.name - if child_name not in inactive: - parents = parents_of[child_name] - hyperparameter_idx = hyperparameter_to_idx[child_name] - if len(parents) == 1: - conditions = parent_conditions_of[child_name] - add = True - for j in range(len(conditions)): - condition = conditions[j] - if not condition._evaluate_vector(vector): - add = False - vector[hyperparameter_idx] = NaN - inactive.add(child_name) - break - if add is True: - active[hyperparameter_idx] = 1 - hps.appendleft(child_name) - - else: - parents_visited = 0 - for parent in parents: - if parent.name in visited: - parents_visited += 1 - if parents_visited > 0: # make sure at least one parent was visited - conditions = parent_conditions_of[child_name] - if isinstance(conditions[0], OrConjunction): - pass - else: # AndCondition - if parents_visited != len(parents): - continue - - add = True - for j in range(len(conditions)): - condition = conditions[j] - if not condition._evaluate_vector(vector): - add = False - vector[hyperparameter_idx] = NaN - inactive.add(child_name) - break - - if add is True: - active[hyperparameter_idx] = 1 - hps.appendleft(child_name) - - else: - continue - - for j in range(len(vector)): - if not active[j]: - vector[j] = NaN - - free(active) - for j in range(len(forbidden_clauses_conditionals)): - clause = forbidden_clauses_conditionals[j] - if clause.c_is_forbidden_vector(vector, strict=False): - msg = "Given vector violates forbidden clause %s" % str(clause) - raise ForbiddenValueError(msg) - - return vector - - -cpdef np.ndarray change_hp_value( - configuration_space, - np.ndarray[DTYPE_t, ndim=1] configuration_array, - str hp_name, - DTYPE_t hp_value, - int index, -): - """Change hyperparameter value in configuration array to given value. - - Does not check if the new value is legal. Activates and deactivates other - hyperparameters if necessary. Does not check if new hyperparameter value - results in the violation of any forbidden clauses. - - Parameters - ---------- - configuration_space : ConfigurationSpace - - configuration_array : np.ndarray - - hp_name : str - - hp_value : float - - index : int - - Returns - ------- - np.ndarray - """ - cdef Hyperparameter current - cdef str current_name - cdef list disabled - cdef set hps_to_be_activate - cdef set visited - cdef int active - cdef ConditionComponent condition - cdef int current_idx - cdef DTYPE_t current_value - cdef DTYPE_t default_value - cdef list children - cdef list children_ - cdef Hyperparameter ch - cdef str child - cdef set to_disable - cdef DTYPE_t NaN = np.NaN - cdef dict children_of = configuration_space._children_of - - configuration_array[index] = hp_value - - # Hyperparameters which are going to be set to inactive - disabled = [] - - # Hyperparameters which are going to be set activate, we introduce this to resolve the conflict that might be raised - # by OrConjunction: - # Suppose that we have a parent HP_p whose possible values are A, B, C; a child HP_d is activate if - # HP_p is A or B. Then when HP_p switches from A to B, HP_d needs to remain activate. - hps_to_be_activate = set() - - # Activate hyperparameters if their parent node got activated - children = children_of[hp_name] - if len(children) > 0: - to_visit = deque() # type: deque - to_visit.extendleft(children) - visited = set() # type: Set[str] - - while len(to_visit) > 0: - current = to_visit.pop() - current_name = current.name - if current_name in visited: - continue - visited.add(current_name) - if current_name in hps_to_be_activate: - continue - - current_idx = configuration_space._hyperparameter_idx[current_name] - current_value = configuration_array[current_idx] - - conditions = configuration_space._parent_conditions_of[current_name] - - active = True - for condition in conditions: - if not condition._evaluate_vector(configuration_array): - active = False - break - - if active: - hps_to_be_activate.add(current_idx) - if current_value == current_value: - children_ = children_of[current_name] - if len(children_) > 0: - to_visit.extendleft(children_) - - if current_name in disabled: - continue - - if active and not current_value == current_value: - default_value = current.normalized_default_value - configuration_array[current_idx] = default_value - children_ = children_of[current_name] - if len(children_) > 0: - to_visit.extendleft(children_) - - # If the hyperparameter was made inactive, - # all its children need to be deactivade as well - if not active and current_value == current_value: - configuration_array[current_idx] = NaN - - children = children_of[current_name] - - if len(children) > 0: - to_disable = set() - for ch in children: - to_disable.add(ch.name) - while len(to_disable) > 0: - child = to_disable.pop() - child_idx = configuration_space._hyperparameter_idx[child] - disabled.append(child_idx) - children = children_of[child] - - for ch in children: - to_disable.add(ch.name) - - for idx in disabled: - if idx not in hps_to_be_activate: - configuration_array[idx] = NaN - - return configuration_array diff --git a/ConfigSpace/conditions.pxd b/ConfigSpace/conditions.pxd deleted file mode 100644 index 8c7af891..00000000 --- a/ConfigSpace/conditions.pxd +++ /dev/null @@ -1,15 +0,0 @@ -import numpy as np -cimport numpy as np - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - - -cdef class ConditionComponent(object): - cdef int _evaluate_vector(self, np.ndarray value) diff --git a/ConfigSpace/conditions.pyx b/ConfigSpace/conditions.pyx deleted file mode 100644 index 90e71c26..00000000 --- a/ConfigSpace/conditions.pyx +++ /dev/null @@ -1,759 +0,0 @@ -# Copyright (c) 2014-2016, ConfigSpace developers -# Matthias Feurer -# Katharina Eggensperger -# and others (see commit history). -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import io -import copy -from itertools import combinations -from typing import Any, List, Union, Tuple, Dict - -from libc.stdlib cimport malloc, free - -import numpy as np - -from ConfigSpace.hyperparameters.hyperparameter cimport Hyperparameter - -cimport numpy as np - - -cdef class ConditionComponent(object): - - def __init__(self) -> None: - pass - - def __repr__(self) -> str: - pass - - def set_vector_idx(self, hyperparameter_to_idx) -> None: - pass - - def get_children_vector(self) -> List[int]: - pass - - def get_parents_vector(self) -> List[int]: - pass - - def get_children(self) -> List["ConditionComponent"]: - pass - - def get_parents(self) -> List["ConditionComponent"]: - pass - - def get_descendant_literal_conditions(self) ->List["AbstractCondition"]: - pass - - def evaluate(self, - instantiated_parent_hyperparameter: Dict[str, Union[None, int, float, str]] - ) -> bool: - pass - - def evaluate_vector(self, instantiated_vector): - return bool(self._evaluate_vector(instantiated_vector)) - - cdef int _evaluate_vector(self, np.ndarray value): - pass - - def __hash__(self) -> int: - """Override the default hash behavior (that returns the id or the object)""" - return hash(tuple(sorted(self.__dict__.items()))) - - -cdef class AbstractCondition(ConditionComponent): - cdef public Hyperparameter child - cdef public Hyperparameter parent - cdef public int child_vector_id - cdef public int parent_vector_id - cdef public value - cdef public DTYPE_t vector_value - - def __init__(self, child: Hyperparameter, parent: Hyperparameter) -> None: - if child == parent: - raise ValueError( - "The child and parent hyperparameter must be different " - "hyperparameters." - ) - self.child = child - self.parent = parent - self.child_vector_id = -1 - self.parent_vector_id = -1 - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - if self.child != other.child: - return False - elif self.parent != other.parent: - return False - return self.value == other.value - - def set_vector_idx(self, hyperparameter_to_idx: dict): - self.child_vector_id = hyperparameter_to_idx[self.child.name] - self.parent_vector_id = hyperparameter_to_idx[self.parent.name] - - def get_children_vector(self) -> List[int]: - return [self.child_vector_id] - - def get_parents_vector(self) -> List[int]: - return [self.parent_vector_id] - - def get_children(self) -> List[Hyperparameter]: - return [self.child] - - def get_parents(self) -> List[Hyperparameter]: - return [self.parent] - - def get_descendant_literal_conditions(self) -> List["AbstractCondition"]: - return [self] - - def evaluate(self, instantiated_parent_hyperparameter: Dict[str, Union[int, float, str]] - ) -> bool: - hp_name = self.parent.name - return self._evaluate(instantiated_parent_hyperparameter[hp_name]) - - cdef int _evaluate_vector(self, np.ndarray instantiated_vector): - if self.parent_vector_id is None: - raise ValueError("Parent vector id should not be None when calling evaluate vector") - return self._inner_evaluate_vector(instantiated_vector[self.parent_vector_id]) - - def _evaluate(self, instantiated_parent_hyperparameter: Union[str, int, float]) -> bool: - pass - - cdef int _inner_evaluate_vector(self, DTYPE_t value): - pass - - -cdef class EqualsCondition(AbstractCondition): - - def __init__(self, child: Hyperparameter, parent: Hyperparameter, - value: Union[str, float, int]) -> None: - """Hyperparameter ``child`` is conditional on the ``parent`` hyperparameter - being *equal* to ``value``. - - Make *b* an active hyperparameter if *a* has the value 1 - - >>> from ConfigSpace import ConfigurationSpace, EqualsCondition - >>> - >>> cs = ConfigurationSpace({ - ... "a": [1, 2, 3], - ... "b": (1.0, 8.0) - ... }) - >>> cond = EqualsCondition(cs['b'], cs['a'], 1) - >>> cs.add_condition(cond) - b | a == 1 - - Parameters - ---------- - child : :ref:`Hyperparameters` - This hyperparameter will be sampled in the configspace - if the *equal condition* is satisfied - parent : :ref:`Hyperparameters` - The hyperparameter, which has to satisfy the *equal condition* - value : str, float, int - Value, which the parent is compared to - """ - - super(EqualsCondition, self).__init__(child, parent) - if not parent.is_legal(value): - raise ValueError("Hyperparameter '%s' is " - "conditional on the illegal value '%s' of " - "its parent hyperparameter '%s'" % - (child.name, value, parent.name)) - self.value = value - self.vector_value = self.parent._inverse_transform(self.value) - - def __repr__(self) -> str: - return "%s | %s == %s" % (self.child.name, self.parent.name, - repr(self.value)) - - def __copy__(self): - return self.__class__( - child=copy.copy(self.child), - parent=copy.copy(self.parent), - value=copy.copy(self.value), - ) - - def _evaluate(self, value: Union[str, float, int]) -> bool: - # No need to check if the value to compare is a legal value; either it - # is equal (and thus legal), or it would evaluate to False anyway - - cmp = self.parent.compare(value, self.value) - if cmp == 0: - return True - else: - return False - - cdef int _inner_evaluate_vector(self, DTYPE_t value): - # No need to check if the value to compare is a legal value; either it - # is equal (and thus legal), or it would evaluate to False anyway - - cdef int cmp = self.parent.compare_vector(value, self.vector_value) - if cmp == 0: - return True - else: - return False - - -cdef class NotEqualsCondition(AbstractCondition): - def __init__(self, child: Hyperparameter, parent: Hyperparameter, - value: Union[str, float, int]) -> None: - """Hyperparameter ``child`` is conditional on the ``parent`` hyperparameter - being *not equal* to ``value``. - - Make *b* an active hyperparameter if *a* has **not** the value 1 - - >>> from ConfigSpace import ConfigurationSpace, NotEqualsCondition - >>> - >>> cs = ConfigurationSpace({ - ... "a": [1, 2, 3], - ... "b": (1.0, 8.0) - ... }) - >>> cond = NotEqualsCondition(cs['b'], cs['a'], 1) - >>> cs.add_condition(cond) - b | a != 1 - - Parameters - ---------- - child : :ref:`Hyperparameters` - This hyperparameter will be sampled in the configspace - if the not-equals condition is satisfied - parent : :ref:`Hyperparameters` - The hyperparameter, which has to satisfy the - *not equal condition* - value : str, float, int - Value, which the parent is compared to - """ - super(NotEqualsCondition, self).__init__(child, parent) - if not parent.is_legal(value): - raise ValueError("Hyperparameter '%s' is " - "conditional on the illegal value '%s' of " - "its parent hyperparameter '%s'" % - (child.name, value, parent.name)) - self.value = value - self.vector_value = self.parent._inverse_transform(self.value) - - def __repr__(self) -> str: - return "%s | %s != %s" % (self.child.name, self.parent.name, - repr(self.value)) - - def __copy__(self): - return self.__class__( - child=copy.copy(self.child), - parent=copy.copy(self.parent), - value=copy.copy(self.value), - ) - - def _evaluate(self, value: Union[str, float, int]) -> bool: - if not self.parent.is_legal(value): - return False - - cmp = self.parent.compare(value, self.value) - if cmp != 0: - return True - else: - return False - - cdef int _inner_evaluate_vector(self, DTYPE_t value): - if not self.parent.is_legal_vector(value): - return False - - cdef int cmp = self.parent.compare_vector(value, self.vector_value) - if cmp != 0: - return True - else: - return False - - -cdef class LessThanCondition(AbstractCondition): - def __init__(self, child: Hyperparameter, parent: Hyperparameter, - value: Union[str, float, int]) -> None: - """ - Hyperparameter ``child`` is conditional on the ``parent`` hyperparameter - being *less than* ``value``. - - Make *b* an active hyperparameter if *a* is less than 5 - - >>> from ConfigSpace import ConfigurationSpace, LessThanCondition - >>> - >>> cs = ConfigurationSpace({ - ... "a": (0, 10), - ... "b": (1.0, 8.0) - ... }) - >>> cond = LessThanCondition(cs['b'], cs['a'], 5) - >>> cs.add_condition(cond) - b | a < 5 - - Parameters - ---------- - child : :ref:`Hyperparameters` - This hyperparameter will be sampled in the configspace, - if the *LessThanCondition* is satisfied - parent : :ref:`Hyperparameters` - The hyperparameter, which has to satisfy the *LessThanCondition* - value : str, float, int - Value, which the parent is compared to - """ - super(LessThanCondition, self).__init__(child, parent) - self.parent.allow_greater_less_comparison() - if not parent.is_legal(value): - raise ValueError("Hyperparameter '%s' is " - "conditional on the illegal value '%s' of " - "its parent hyperparameter '%s'" % - (child.name, value, parent.name)) - self.value = value - self.vector_value = self.parent._inverse_transform(self.value) - - def __repr__(self) -> str: - return "%s | %s < %s" % (self.child.name, self.parent.name, - repr(self.value)) - - def __copy__(self): - return self.__class__( - child=copy.copy(self.child), - parent=copy.copy(self.parent), - value=copy.copy(self.value), - ) - - def _evaluate(self, value: Union[str, float, int]) -> bool: - if not self.parent.is_legal(value): - return False - - cmp = self.parent.compare(value, self.value) - if cmp == -1: - return True - else: - return False - - cdef int _inner_evaluate_vector(self, DTYPE_t value): - if not self.parent.is_legal_vector(value): - return False - - cdef int cmp = self.parent.compare_vector(value, self.vector_value) - if cmp == -1: - return True - else: - return False - - -cdef class GreaterThanCondition(AbstractCondition): - def __init__(self, child: Hyperparameter, parent: Hyperparameter, - value: Union[str, float, int]) -> None: - """ - Hyperparameter ``child`` is conditional on the ``parent`` hyperparameter - being *greater than* ``value``. - - Make *b* an active hyperparameter if *a* is greater than 5 - - >>> from ConfigSpace import ConfigurationSpace, GreaterThanCondition - >>> - >>> cs = ConfigurationSpace({ - ... "a": (0, 10), - ... "b": (1.0, 8.0) - ... }) - >>> cond = GreaterThanCondition(cs['b'], cs['a'], 5) - >>> cs.add_condition(cond) - b | a > 5 - - Parameters - ---------- - child : :ref:`Hyperparameters` - This hyperparameter will be sampled in the configspace, - if the *GreaterThanCondition* is satisfied - parent : :ref:`Hyperparameters` - The hyperparameter, which has to satisfy the *GreaterThanCondition* - value : str, float, int - Value, which the parent is compared to - """ - super(GreaterThanCondition, self).__init__(child, parent) - - self.parent.allow_greater_less_comparison() - if not parent.is_legal(value): - raise ValueError("Hyperparameter '%s' is " - "conditional on the illegal value '%s' of " - "its parent hyperparameter '%s'" % - (child.name, value, parent.name)) - self.value = value - self.vector_value = self.parent._inverse_transform(self.value) - - def __repr__(self) -> str: - return "%s | %s > %s" % (self.child.name, self.parent.name, - repr(self.value)) - - def __copy__(self): - return self.__class__( - child=copy.copy(self.child), - parent=copy.copy(self.parent), - value=copy.copy(self.value), - ) - - def _evaluate(self, value: Union[str, float, int]) -> bool: - if not self.parent.is_legal(value): - return False - - cmp = self.parent.compare(value, self.value) - if cmp == 1: - return True - else: - return False - - cdef int _inner_evaluate_vector(self, DTYPE_t value): - if not self.parent.is_legal_vector(value): - return False - - cdef int cmp = self.parent.compare_vector(value, self.vector_value) - if cmp == 1: - return True - else: - return False - -cdef class InCondition(AbstractCondition): - cdef public values - cdef public vector_values - - def __init__(self, child: Hyperparameter, parent: Hyperparameter, - values: List[Union[str, float, int]]) -> None: - """ - Hyperparameter ``child`` is conditional on the ``parent`` hyperparameter - being *in* a set of ``values``. - - make *b* an active hyperparameter if *a* is in the set [1, 2, 3, 4] - - >>> from ConfigSpace import ConfigurationSpace, InCondition - >>> - >>> cs = ConfigurationSpace({ - ... "a": (0, 10), - ... "b": (1.0, 8.0) - ... }) - >>> cond = InCondition(cs['b'], cs['a'], [1, 2, 3, 4]) - >>> cs.add_condition(cond) - b | a in {1, 2, 3, 4} - - Parameters - ---------- - child : :ref:`Hyperparameters` - This hyperparameter will be sampled in the configspace, - if the *InCondition* is satisfied - parent : :ref:`Hyperparameters` - The hyperparameter, which has to satisfy the *InCondition* - values : list(str, float, int) - Collection of values, which the parent is compared to - - """ - super(InCondition, self).__init__(child, parent) - for value in values: - if not parent.is_legal(value): - raise ValueError("Hyperparameter '%s' is " - "conditional on the illegal value '%s' of " - "its parent hyperparameter '%s'" % - (child.name, value, parent.name)) - self.values = values - self.value = values - self.vector_values = [self.parent._inverse_transform(value) for value in self.values] - - def __repr__(self) -> str: - return "%s | %s in {%s}" % (self.child.name, self.parent.name, - ", ".join( - [repr(value) for value in self.values])) - - def _evaluate(self, value: Union[str, float, int]) -> bool: - return value in self.values - - cdef int _inner_evaluate_vector(self, DTYPE_t value): - return value in self.vector_values - - -cdef class AbstractConjunction(ConditionComponent): - cdef public tuple components - cdef int n_components - cdef tuple dlcs - - def __init__(self, *args: AbstractCondition) -> None: - super(AbstractConjunction, self).__init__() - self.components = args - self.n_components = len(self.components) - self.dlcs = self.get_descendant_literal_conditions() - - # Test the classes - for idx, component in enumerate(self.components): - if not isinstance(component, ConditionComponent): - raise TypeError("Argument #%d is not an instance of %s, " - "but %s" % ( - idx, ConditionComponent, type(component))) - - # Test that all conjunctions and conditions have the same child! - children = self.get_children() - for c1, c2 in combinations(children, 2): - if c1 != c2: - raise ValueError("All Conjunctions and Conditions must have " - "the same child.") - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - if len(self.components) != len(other.components): - return False - - for component, other_component in zip(self.components, other.components): - if (component != other_component): - return False - - return True - - def __copy__(self): - return self.__class__(*[copy.copy(comp) for comp in self.components]) - - def get_descendant_literal_conditions(self) -> Tuple[AbstractCondition]: - children = [] # type: List[AbstractCondition] - for component in self.components: - if isinstance(component, AbstractConjunction): - children.extend(component.get_descendant_literal_conditions()) - else: - children.append(component) - return tuple(children) - - def set_vector_idx(self, hyperparameter_to_idx: dict): - for component in self.components: - component.set_vector_idx(hyperparameter_to_idx) - - def get_children_vector(self) -> List[int]: - children_vector = [] - for component in self.components: - children_vector.extend(component.get_children_vector()) - return children_vector - - def get_parents_vector(self) -> List[int]: - parents_vector = [] - for component in self.components: - parents_vector.extend(component.get_parents_vector()) - return parents_vector - - def get_children(self) -> List[ConditionComponent]: - children = [] # type: List[ConditionComponent] - for component in self.components: - children.extend(component.get_children()) - return children - - def get_parents(self) -> List[ConditionComponent]: - parents = [] # type: List[ConditionComponent] - for component in self.components: - parents.extend(component.get_parents()) - return parents - - def evaluate(self, instantiated_hyperparameters: Dict[str, Union[None, int, float, str]] - ) -> bool: - cdef int* arrptr - arrptr = malloc(sizeof(int) * self.n_components) - - # Then, check if all parents were passed - conditions = self.dlcs - for condition in conditions: - if condition.parent.name not in instantiated_hyperparameters: - raise ValueError("Evaluate must be called with all " - "instanstatiated parent hyperparameters in " - "the conjunction; you are (at least) missing " - "'%s'" % condition.parent.name) - - # Finally, call evaluate for all direct descendents and combine the - # outcomes - for i, component in enumerate(self.components): - e = component.evaluate(instantiated_hyperparameters) - arrptr[i] = (e) - - rval = self._evaluate(self.n_components, arrptr) - free(arrptr) - return rval - - cdef int _evaluate_vector(self, np.ndarray instantiated_vector): - cdef ConditionComponent component - cdef int e - cdef int rval - cdef int* arrptr - arrptr = malloc(sizeof(int) * self.n_components) - - # Finally, call evaluate for all direct descendents and combine the - # outcomes - for i in range(self.n_components): - component = self.components[i] - e = component._evaluate_vector(instantiated_vector) - arrptr[i] = e - - rval = self._evaluate(self.n_components, arrptr) - free(arrptr) - return rval - - cdef int _evaluate(self, int I, int* evaluations): - pass - - -cdef class AndConjunction(AbstractConjunction): - # TODO: test if an AndConjunction results in an illegal state or a - # Tautology! -> SAT solver - def __init__(self, *args: AbstractCondition) -> None: - """By using the *AndConjunction*, constraints can easily be connected. - - The following example shows how two constraints with an *AndConjunction* - can be combined. - - >>> from ConfigSpace import ( - ... ConfigurationSpace, - ... LessThanCondition, - ... GreaterThanCondition, - ... AndConjunction - ... ) - >>> - >>> cs = ConfigurationSpace({ - ... "a": (5, 15), - ... "b": (0, 10), - ... "c": (0.0, 1.0) - ... }) - >>> less_cond = LessThanCondition(cs['c'], cs['a'], 10) - >>> greater_cond = GreaterThanCondition(cs['c'], cs['b'], 5) - >>> cs.add_condition(AndConjunction(less_cond, greater_cond)) - (c | a < 10 && c | b > 5) - - Parameters - ---------- - *args : :ref:`Conditions` - conditions, which will be combined with an *AndConjunction* - """ - if len(args) < 2: - raise ValueError("AndConjunction must at least have two Conditions.") - super(AndConjunction, self).__init__(*args) - - def __repr__(self) -> str: - retval = io.StringIO() - retval.write("(") - for idx, component in enumerate(self.components): - retval.write(str(component)) - if idx < len(self.components) - 1: - retval.write(" && ") - retval.write(")") - return retval.getvalue() - - cdef int _evaluate_vector(self, np.ndarray instantiated_vector): - cdef ConditionComponent component - cdef int e - - for i in range(self.n_components): - component = self.components[i] - e = component._evaluate_vector(instantiated_vector) - if e == 0: - return 0 - - return 1 - - cdef int _evaluate(self, int I, int* evaluations): - for i in range(I): - if evaluations[i] == 0: - return 0 - return 1 - - -cdef class OrConjunction(AbstractConjunction): - def __init__(self, *args: AbstractCondition) -> None: - """ - Similar to the *AndConjunction*, constraints can be combined by - using the *OrConjunction*. - - >>> from ConfigSpace import ( - ... ConfigurationSpace, - ... LessThanCondition, - ... GreaterThanCondition, - ... OrConjunction - ... ) - >>> - >>> cs = ConfigurationSpace({ - ... "a": (5, 15), - ... "b": (0, 10), - ... "c": (0.0, 1.0) - ... }) - >>> less_cond = LessThanCondition(cs['c'], cs['a'], 10) - >>> greater_cond = GreaterThanCondition(cs['c'], cs['b'], 5) - >>> cs.add_condition(OrConjunction(less_cond, greater_cond)) - (c | a < 10 || c | b > 5) - - Parameters - ---------- - *args : :ref:`Conditions` - conditions, which will be combined with an *OrConjunction* - """ - if len(args) < 2: - raise ValueError("OrConjunction must at least have two Conditions.") - super(OrConjunction, self).__init__(*args) - - def __repr__(self) -> str: - retval = io.StringIO() - retval.write("(") - for idx, component in enumerate(self.components): - retval.write(str(component)) - if idx < len(self.components) - 1: - retval.write(" || ") - retval.write(")") - return retval.getvalue() - - cdef int _evaluate(self, int I, int* evaluations): - for i in range(I): - if evaluations[i] == 1: - return 1 - return 0 - - cdef int _evaluate_vector(self, np.ndarray instantiated_vector): - cdef ConditionComponent component - cdef int e - - for i in range(self.n_components): - component = self.components[i] - e = component._evaluate_vector(instantiated_vector) - if e == 1: - return 1 - - return 0 diff --git a/ConfigSpace/configuration.py b/ConfigSpace/configuration.py deleted file mode 100644 index 2d82c38e..00000000 --- a/ConfigSpace/configuration.py +++ /dev/null @@ -1,264 +0,0 @@ -from __future__ import annotations - -import warnings -from typing import TYPE_CHECKING, Any, Iterator, KeysView, Mapping, Sequence - -import numpy as np - -from ConfigSpace import c_util -from ConfigSpace.exceptions import HyperparameterNotFoundError, IllegalValueError -from ConfigSpace.hyperparameters import FloatHyperparameter - -if TYPE_CHECKING: - from ConfigSpace.configuration_space import ConfigurationSpace - - -class Configuration(Mapping[str, Any]): - def __init__( - self, - configuration_space: ConfigurationSpace, - values: Mapping[str, str | float | int | None] | None = None, - vector: Sequence[float] | np.ndarray | None = None, - allow_inactive_with_values: bool = False, - origin: Any | None = None, - config_id: int | None = None, - ) -> None: - """Class for a single configuration. - - The :class:`~ConfigSpace.configuration_space.Configuration` object holds - for all active hyperparameters a value. While the - :class:`~ConfigSpace.configuration_space.ConfigurationSpace` stores the - definitions for the hyperparameters (value ranges, constraints,...), a - :class:`~ConfigSpace.configuration_space.Configuration` object is - more an instance of it. Parameters of a - :class:`~ConfigSpace.configuration_space.Configuration` object can be - accessed and modified similar to python dictionaries - (c.f. :ref:`Guide<1st_Example>`). - - Parameters - ---------- - configuration_space : :class:`~ConfigSpace.configuration_space.ConfigurationSpace` - values : dict, optional - A dictionary with pairs (hyperparameter_name, value), where value is - a legal value of the hyperparameter in the above configuration_space - vector : np.ndarray, optional - A numpy array for efficient representation. Either values or vector - has to be given - allow_inactive_with_values : bool, optional - Whether an Exception will be raised if a value for an inactive - hyperparameter is given. Default is to raise an Exception. - Default to False - origin : Any, optional - Store information about the origin of this configuration. Defaults to None - config_id : int, optional - Integer configuration ID which can be used by a program using the ConfigSpace - package. - """ - if values is not None and vector is not None or values is None and vector is None: - raise ValueError("Specify Configuration as either a dictionary or a vector.") - - self.config_space = configuration_space - self.allow_inactive_with_values = allow_inactive_with_values - self.origin = origin - self.config_id = config_id - - # This is cached. When it's None, it means it needs to be relaoaded - # which is primarly handled in __getitem__. - self._values: dict[str, Any] | None = None - - # Will be set below - self._vector: np.ndarray - - if values is not None: - unknown_keys = values.keys() - self.config_space._hyperparameters.keys() - if any(unknown_keys): - raise ValueError(f"Unknown hyperparameter(s) {unknown_keys}") - - # Using cs._hyperparameters to iterate makes sure that the hyperparameters in - # the configuration are sorted in the same way as they are sorted in - # the configuration space - self._values = {} - self._vector = np.ndarray(shape=len(configuration_space), dtype=float) - - for i, (key, hp) in enumerate(configuration_space.items()): - value = values.get(key) - if value is None: - self._vector[i] = np.nan # By default, represent None values as NaN - continue - - if not hp.is_legal(value): - raise IllegalValueError(hp, value) - - # Truncate the float to be of constant length for a python version - if isinstance(hp, FloatHyperparameter): - value = float(repr(value)) - - self._values[key] = value - self._vector[i] = hp._inverse_transform(value) - - self.is_valid_configuration() - - elif vector is not None: - _vector = np.asarray(vector, dtype=float) - - # If we have a 2d array with shape (n, 1), flatten it - if len(_vector.shape) == 2 and _vector.shape[1] == 1: - _vector = _vector.flatten() - - if len(_vector.shape) > 1: - raise ValueError( - "Only 1d arrays can be converted to a Configuration, " - f"you passed an array of shape {_vector.shape}", - ) - - n_hyperparameters = len(self.config_space) - if len(_vector) != len(self.config_space): - raise ValueError( - f"Expected array of length {n_hyperparameters}, got {len(_vector)}", - ) - - self._vector = _vector - - def is_valid_configuration(self) -> None: - """Check if the object is a valid. - - Raises - ------ - ValueError: If configuration is not valid. - """ - c_util.check_configuration( - self.config_space, - self._vector, - allow_inactive_with_values=self.allow_inactive_with_values, - ) - - def get_array(self) -> np.ndarray: - """The internal vector representation of this config. - - All continuous values are scaled between zero and one. - - Returns - ------- - numpy.ndarray - The vector representation of the configuration - """ - return self._vector - - def __contains__(self, item: object) -> bool: - if not isinstance(item, str): - return False - - return item in self - - def __setitem__(self, key: str, value: Any) -> None: - param = self.config_space[key] - if not param.is_legal(value): - raise IllegalValueError(param, value) - - idx = self.config_space._hyperparameter_idx[key] - - # Recalculate the vector with respect to this new value - vector_value = param._inverse_transform(value) - new_array = c_util.change_hp_value( - self.config_space, - self.get_array().copy(), - param.name, - vector_value, - idx, - ) - c_util.check_configuration(self.config_space, new_array, False) - - # Reset cached items - self._vector = new_array - self._values = None - - def __getitem__(self, key: str) -> Any: - if self._values is not None and key in self._values: - return self._values[key] - - if key not in self.config_space: - raise HyperparameterNotFoundError(key, space=self.config_space) - - item_idx = self.config_space._hyperparameter_idx[key] - - raw_value = self._vector[item_idx] - if not np.isfinite(raw_value): - # NOTE: Techinically we could raise an `InactiveHyperparameterError` here - # but that causes the `.get()` method from being a mapping to fail. - # Normally `config.get(key)`, if it fails, will return None. Apparently, - # this only works if `__getitem__[]` raises a KeyError or something derived - # from it. - raise KeyError(key) - - hyperparameter = self.config_space._hyperparameters[key] - value = hyperparameter._transform(raw_value) - - # Truncate float to be of constant length for a python version - if isinstance(hyperparameter, FloatHyperparameter): - value = float(repr(value)) - - if self._values is None: - self._values = {} - - self._values[key] = value - return value - - def keys(self) -> KeysView[str]: - """Return the keys of the configuration. - - Returns - ------- - KeysView[str] - The keys of the configuration - """ - d = { - key: self._vector[idx] - for idx, key in enumerate(self.config_space.keys()) - if np.isfinite(self._vector[idx]) - } - return d.keys() - - def __eq__(self, other: Any) -> bool: - if isinstance(other, self.__class__): - return dict(self) == dict(other) and self.config_space == other.config_space - return NotImplemented - - def __hash__(self) -> int: - return hash(self.__repr__()) - - def __repr__(self) -> str: - values = dict(self) - header = "Configuration(values={" - lines = [f" '{key}': {repr(values[key])}," for key in sorted(values.keys())] - end = "})" - return "\n".join([header, *lines, end]) - - def __iter__(self) -> Iterator[str]: - return iter(self.keys()) - - def __len__(self) -> int: - return len(self.config_space) - - # ------------ Marked Deprecated -------------------- - # Probably best to only remove these once we actually - # make some other breaking changes - # * Search `Marked Deprecated` to find others - def get_dictionary(self) -> dict[str, Any]: - """A representation of the :class:`~ConfigSpace.configuration_space.Configuration` - in dictionary form. - - Returns - ------- - dict - Configuration as dictionary - """ - warnings.warn( - "`Configuration` act's like a dictionary." - " Please use `dict(config)` instead of `get_dictionary`" - " if you explicitly need a `dict`", - DeprecationWarning, - stacklevel=2, - ) - return dict(self) - - # --------------------------------------------------- diff --git a/ConfigSpace/configuration_space.py b/ConfigSpace/configuration_space.py deleted file mode 100644 index 29f624f6..00000000 --- a/ConfigSpace/configuration_space.py +++ /dev/null @@ -1,1584 +0,0 @@ -# Copyright (c) 2014-2016, ConfigSpace developers -# Matthias Feurer -# Katharina Eggensperger -# and others (see commit history). -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from __future__ import annotations - -import contextlib -import copy -import io -import warnings -from collections import OrderedDict, defaultdict, deque -from itertools import chain -from typing import Any, Iterable, Iterator, KeysView, Mapping, cast, overload -from typing_extensions import Final - -import numpy as np - -import ConfigSpace.c_util -from ConfigSpace import nx -from ConfigSpace.conditions import ( - AbstractCondition, - AbstractConjunction, - ConditionComponent, - EqualsCondition, -) -from ConfigSpace.configuration import Configuration -from ConfigSpace.exceptions import ( - ActiveHyperparameterNotSetError, - AmbiguousConditionError, - ChildNotFoundError, - CyclicDependancyError, - ForbiddenValueError, - HyperparameterAlreadyExistsError, - HyperparameterIndexError, - HyperparameterNotFoundError, - IllegalValueError, - InactiveHyperparameterSetError, - ParentNotFoundError, -) -from ConfigSpace.forbidden import ( - AbstractForbiddenClause, - AbstractForbiddenComponent, - AbstractForbiddenConjunction, - ForbiddenRelation, -) -from ConfigSpace.hyperparameters import ( - CategoricalHyperparameter, - Constant, - Hyperparameter, - UniformFloatHyperparameter, - UniformIntegerHyperparameter, -) - -_ROOT: Final = "__HPOlib_configuration_space_root__" - - -def _parse_hyperparameters_from_dict(items: dict[str, Any]) -> Iterator[Hyperparameter]: - for name, hp in items.items(): - # Anything that is a Hyperparameter already is good - # Note that we discard the key name in this case in favour - # of the name given in the dictionary - if isinstance(hp, Hyperparameter): - yield hp - - # Tuples are bounds, check if float or int - elif isinstance(hp, tuple): - if len(hp) != 2: - raise ValueError(f"'{name}' must be (lower, upper) bound, got {hp}") - - lower, upper = hp - if isinstance(lower, float): - yield UniformFloatHyperparameter(name, lower, upper) - else: - yield UniformIntegerHyperparameter(name, lower, upper) - - # Lists are categoricals - elif isinstance(hp, list): - if len(hp) == 0: - raise ValueError(f"Can't have empty list for categorical {name}") - - yield CategoricalHyperparameter(name, hp) - - # If it's an allowed type, it's a constant - elif isinstance(hp, (int, str, float)): - yield Constant(name, hp) - - else: - raise ValueError(f"Unknown value '{hp}' for '{name}'") - - -def _assert_type(item: Any, expected: type, method: str | None = None) -> None: - if not isinstance(item, expected): - msg = f"Expected {expected}, got {type(item)}" - if method: - msg += " in method " + method - raise TypeError(msg) - - -def _assert_legal(hyperparameter: Hyperparameter, value: tuple | list | Any) -> None: - if isinstance(value, (tuple, list)): - for v in value: - if not hyperparameter.is_legal(v): - raise IllegalValueError(hyperparameter, v) - elif not hyperparameter.is_legal(value): - raise IllegalValueError(hyperparameter, value) - - -class ConfigurationSpace(Mapping[str, Hyperparameter]): - """A collection-like object containing a set of hyperparameter definitions and conditions. - - A configuration space organizes all hyperparameters and its conditions - as well as its forbidden clauses. Configurations can be sampled from - this configuration space. As underlying data structure, the - configuration space uses a tree-based approach to represent the - conditions and restrictions between hyperparameters. - """ - - def __init__( - self, - name: str | dict | None = None, - seed: int | None = None, - meta: dict | None = None, - *, - space: None - | ( - dict[ - str, - tuple[int, int] | tuple[float, float] | list[Any] | int | float | str, - ] - ) = None, - ) -> None: - """ - - Parameters - ---------- - name : str | dict, optional - Name of the configuration space. If a dict is passed, this is considered the same - as the `space` arg. - seed : int, optional - Random seed - meta : dict, optional - Field for holding meta data provided by the user. - Not used by the configuration space. - space: - A simple configuration space to use: - - .. code:: python - - ConfigurationSpace( - name="myspace", - space={ - "uniform_integer": (1, 10), - "uniform_float": (1.0, 10.0), - "categorical": ["a", "b", "c"], - "constant": 1337, - } - ) - - """ - # If first arg is a dict, we assume this to be `space` - if isinstance(name, dict): - space = name - name = None - - self.name = name - self.meta = meta - - # NOTE: The idx of a hyperparamter is tied to its order in _hyperparamters - # Having three variables to keep track of this seems excessive - self._hyperparameters: OrderedDict[str, Hyperparameter] = OrderedDict() - self._hyperparameter_idx: dict[str, int] = {} - self._idx_to_hyperparameter: dict[int, str] = {} - - # Use dictionaries to make sure that we don't accidently add - # additional keys to these mappings (which happened with defaultdict()). - # This once broke auto-sklearn's equal comparison of configuration - # spaces when _children of one instance contained all possible - # hyperparameters as keys and empty dictionaries as values while the - # other instance not containing these. - self._children: OrderedDict[str, OrderedDict[str, None | AbstractCondition]] - self._children = OrderedDict() - - self._parents: OrderedDict[str, OrderedDict[str, None | AbstractCondition]] - self._parents = OrderedDict() - - # Changing this to a normal dict will break sampling because there is - # no guarantee that the parent of a condition was evaluated before - self._conditionals: set[str] = set() - self.forbidden_clauses: list[AbstractForbiddenComponent] = [] - self.random = np.random.RandomState(seed) - - self._children[_ROOT] = OrderedDict() - - self._parent_conditions_of: dict[str, list[AbstractCondition]] = {} - self._child_conditions_of: dict[str, list[AbstractCondition]] = {} - self._parents_of: dict[str, list[Hyperparameter]] = {} - self._children_of: dict[str, list[Hyperparameter]] = {} - - if space is not None: - hyperparameters = list(_parse_hyperparameters_from_dict(space)) - self.add_hyperparameters(hyperparameters) - - def add_hyperparameter(self, hyperparameter: Hyperparameter) -> Hyperparameter: - """Add a hyperparameter to the configuration space. - - Parameters - ---------- - hyperparameter : :ref:`Hyperparameters` - The hyperparameter to add - - Returns - ------- - :ref:`Hyperparameters` - The added hyperparameter - """ - _assert_type(hyperparameter, Hyperparameter, method="add_hyperparameter") - - self._add_hyperparameter(hyperparameter) - self._update_cache() - self._check_default_configuration() - self._sort_hyperparameters() - - return hyperparameter - - def add_hyperparameters( - self, - hyperparameters: Iterable[Hyperparameter], - ) -> list[Hyperparameter]: - """Add hyperparameters to the configuration space. - - Parameters - ---------- - hyperparameters : Iterable(:ref:`Hyperparameters`) - Collection of hyperparameters to add - - Returns - ------- - list(:ref:`Hyperparameters`) - List of added hyperparameters (same as input) - """ - hyperparameters = list(hyperparameters) - for hp in hyperparameters: - _assert_type(hp, Hyperparameter, method="add_hyperparameters") - - for hyperparameter in hyperparameters: - self._add_hyperparameter(hyperparameter) - - self._update_cache() - self._check_default_configuration() - self._sort_hyperparameters() - return hyperparameters - - def add_condition(self, condition: ConditionComponent) -> ConditionComponent: - """Add a condition to the configuration space. - - Check if adding the condition is legal: - - - The parent in a condition statement must exist - - The condition must add no cycles - - The internal array keeps track of all edges which must be - added to the DiGraph; if the checks don't raise any Exception, - these edges are finally added at the end of the function. - - Parameters - ---------- - condition : :ref:`Conditions` - Condition to add - - Returns - ------- - :ref:`Conditions` - Same condition as input - """ - _assert_type(condition, ConditionComponent, method="add_condition") - - if isinstance(condition, AbstractCondition): - self._check_edges([(condition.parent, condition.child)], [condition.value]) - self._check_condition(condition.child, condition) - self._add_edge(condition.parent, condition.child, condition=condition) - - # Loop over the Conjunctions to find out the conditions we must add! - elif isinstance(condition, AbstractConjunction): - dlcs = condition.get_descendant_literal_conditions() - edges = [(dlc.parent, dlc.child) for dlc in dlcs] - values = [dlc.value for dlc in dlcs] - self._check_edges(edges, values) - - for dlc in dlcs: - self._check_condition(dlc.child, condition) - self._add_edge(dlc.parent, dlc.child, condition=condition) - - else: - raise Exception("This should never happen!") - - self._sort_hyperparameters() - self._update_cache() - return condition - - def add_conditions( - self, - conditions: list[ConditionComponent], - ) -> list[ConditionComponent]: - """Add a list of conditions to the configuration space. - - They must be legal. Take a look at - :meth:`~ConfigSpace.configuration_space.ConfigurationSpace.add_condition`. - - Parameters - ---------- - conditions : list(:ref:`Conditions`) - collection of conditions to add - - Returns - ------- - list(:ref:`Conditions`) - Same as input conditions - """ - for condition in conditions: - _assert_type(condition, ConditionComponent, method="add_conditions") - - edges = [] - values = [] - conditions_to_add = [] - for condition in conditions: - if isinstance(condition, AbstractCondition): - edges.append((condition.parent, condition.child)) - values.append(condition.value) - conditions_to_add.append(condition) - elif isinstance(condition, AbstractConjunction): - dlcs = condition.get_descendant_literal_conditions() - edges.extend([(dlc.parent, dlc.child) for dlc in dlcs]) - values.extend([dlc.value for dlc in dlcs]) - conditions_to_add.extend([condition] * len(dlcs)) - - for edge, condition in zip(edges, conditions_to_add): - self._check_condition(edge[1], condition) - - self._check_edges(edges, values) - for edge, condition in zip(edges, conditions_to_add): - self._add_edge(edge[0], edge[1], condition) - - self._sort_hyperparameters() - self._update_cache() - return conditions - - def add_forbidden_clause( - self, - clause: AbstractForbiddenComponent, - ) -> AbstractForbiddenComponent: - """ - Add a forbidden clause to the configuration space. - - Parameters - ---------- - clause : :ref:`Forbidden clauses` - Forbidden clause to add - - Returns - ------- - :ref:`Forbidden clauses` - Same as input forbidden clause - """ - self._check_forbidden_component(clause=clause) - clause.set_vector_idx(self._hyperparameter_idx) - self.forbidden_clauses.append(clause) - self._check_default_configuration() - return clause - - def add_forbidden_clauses( - self, - clauses: list[AbstractForbiddenComponent], - ) -> list[AbstractForbiddenComponent]: - """ - Add a list of forbidden clauses to the configuration space. - - Parameters - ---------- - clauses : list(:ref:`Forbidden clauses`) - Collection of forbidden clauses to add - - Returns - ------- - list(:ref:`Forbidden clauses`) - Same as input clauses - """ - for clause in clauses: - self._check_forbidden_component(clause=clause) - clause.set_vector_idx(self._hyperparameter_idx) - self.forbidden_clauses.append(clause) - - self._check_default_configuration() - return clauses - - def add_configuration_space( - self, - prefix: str, - configuration_space: ConfigurationSpace, - delimiter: str = ":", - parent_hyperparameter: dict | None = None, - ) -> ConfigurationSpace: - """ - Combine two configuration space by adding one the other configuration - space. The contents of the configuration space, which should be added, - are renamed to ``prefix`` + ``delimiter`` + old_name. - - Parameters - ---------- - prefix : str - The prefix for the renamed hyperparameter | conditions | - forbidden clauses - configuration_space : :class:`~ConfigSpace.configuration_space.ConfigurationSpace` - The configuration space which should be added - delimiter : str, optional - Defaults to ':' - parent_hyperparameter : dict | None = None - Adds for each new hyperparameter the condition, that - ``parent_hyperparameter`` is active. Must be a dictionary with two keys - "parent" and "value", meaning that the added configuration space is active - when `parent` is equal to `value` - - Returns - ------- - :class:`~ConfigSpace.configuration_space.ConfigurationSpace` - The configuration space, which was added - """ - _assert_type(configuration_space, ConfigurationSpace, method="add_configuration_space") - - prefix_delim = f"{prefix}{delimiter}" - - def _new_name(_item: Hyperparameter) -> str: - if _item.name in ("", prefix): - return prefix - - if not _item.name.startswith(prefix_delim): - return f"{prefix_delim}{_item.name}" - - return cast(str, _item.name) - - new_parameters = [] - for hp in configuration_space.values(): - new_hp = copy.copy(hp) - new_hp.name = _new_name(hp) - new_parameters.append(new_hp) - - self.add_hyperparameters(new_parameters) - - conditions_to_add = [] - for condition in configuration_space.get_conditions(): - new_condition = copy.copy(condition) - for dlc in new_condition.get_descendant_literal_conditions(): - # Rename children - dlc.child.name = _new_name(dlc.child) - dlc.parent.name = _new_name(dlc.parent) - - conditions_to_add.append(new_condition) - - self.add_conditions(conditions_to_add) - - forbiddens_to_add = [] - for forbidden_clause in configuration_space.forbidden_clauses: - new_forbidden = forbidden_clause - for dlc in new_forbidden.get_descendant_literal_clauses(): - if isinstance(dlc, ForbiddenRelation): - dlc.left.name = _new_name(dlc.left) - dlc.right.name = _new_name(dlc.right) - else: - dlc.hyperparameter.name = _new_name(dlc.hyperparameter) - forbiddens_to_add.append(new_forbidden) - - self.add_forbidden_clauses(forbiddens_to_add) - - conditions_to_add = [] - if parent_hyperparameter is not None: - parent = parent_hyperparameter["parent"] - value = parent_hyperparameter["value"] - - # Only add a condition if the parameter is a top-level parameter of the new - # configuration space (this will be some kind of tree structure). - for new_hp in new_parameters: - parents = self.get_parents_of(new_hp) - if not any(parents): - condition = EqualsCondition(new_hp, parent, value) - conditions_to_add.append(condition) - - self.add_conditions(conditions_to_add) - - return configuration_space - - def get_hyperparameter_by_idx(self, idx: int) -> str: - """Name of a hyperparameter from the space given its id. - - Parameters - ---------- - idx : int - Id of a hyperparameter - - Returns - ------- - str - Name of the hyperparameter - """ - hp = self._idx_to_hyperparameter.get(idx) - if hp is None: - raise HyperparameterIndexError(idx, self) - - return hp - - def get_idx_by_hyperparameter_name(self, name: str) -> int: - """The id of a hyperparameter by its ``name``. - - Parameters - ---------- - name : str - Name of a hyperparameter - - Returns - ------- - int - Id of the hyperparameter with name ``name`` - """ - idx = self._hyperparameter_idx.get(name) - - if idx is None: - raise HyperparameterNotFoundError(name, space=self) - - return idx - - def get_conditions(self) -> list[AbstractCondition]: - """All conditions from the configuration space. - - Returns - ------- - list(:ref:`Conditions`) - Conditions of the configuration space - """ - conditions = [] - added_conditions: set[str] = set() - - # Nodes is a list of nodes - for source_node in self._hyperparameters.values(): - # This is a list of keys in a dictionary - # TODO sort the edges by the order of their source_node in the - # hyperparameter list! - for target_node in self._children[source_node.name]: - if target_node not in added_conditions: - condition = self._children[source_node.name][target_node] - conditions.append(condition) - added_conditions.add(target_node) - - return conditions - - def get_forbiddens(self) -> list[AbstractForbiddenComponent]: - """All forbidden clauses from the configuration space. - - Returns - ------- - list(:ref:`Forbidden clauses`) - List with the forbidden clauses - """ - return self.forbidden_clauses - - def get_children_of(self, name: str | Hyperparameter) -> list[Hyperparameter]: - """ - Return a list with all children of a given hyperparameter. - - Parameters - ---------- - name : str, :ref:`Hyperparameters` - Hyperparameter or its name, for which all children are requested - - Returns - ------- - list(:ref:`Hyperparameters`) - Children of the hyperparameter - """ - conditions = self.get_child_conditions_of(name) - parents: list[Hyperparameter] = [] - for condition in conditions: - parents.extend(condition.get_children()) - return parents - - def generate_all_continuous_from_bounds( - self, - bounds: list[tuple[float, float]], - ) -> None: - """Generate :class:`~ConfigSpace.hyperparameters.UniformFloatHyperparameter` - from a list containing lists with lower and upper bounds. - - The generated hyperparameters are added to the configuration space. - - Parameters - ---------- - bounds : list[tuple([float, float])] - List containing lists with two elements: lower and upper bound - """ - self.add_hyperparameters( - [ - UniformFloatHyperparameter(name=f"x{i}", lower=lower, upper=upper) - for i, (lower, upper) in enumerate(bounds) - ], - ) - - def get_child_conditions_of( - self, - name: str | Hyperparameter, - ) -> list[AbstractCondition]: - """ - Return a list with conditions of all children of a given - hyperparameter referenced by its ``name``. - - Parameters - ---------- - name : str, :ref:`Hyperparameters` - Hyperparameter or its name, for which conditions are requested - - Returns - ------- - list(:ref:`Conditions`) - List with the conditions on the children of the given hyperparameter - """ - name = name if isinstance(name, str) else name.name - - # This raises an exception if the hyperparameter does not exist - self[name] - return self._get_child_conditions_of(name) - - def get_parents_of(self, name: str | Hyperparameter) -> list[Hyperparameter]: - """The parents hyperparameters of a given hyperparameter. - - Parameters - ---------- - name : str, :ref:`Hyperparameters` - Can either be the name of a hyperparameter or the hyperparameter - object. - - Returns - ------- - list[:ref:`Conditions`] - List with all parent hyperparameters - """ - conditions = self.get_parent_conditions_of(name) - parents: list[Hyperparameter] = [] - for condition in conditions: - parents.extend(condition.get_parents()) - return parents - - def get_parent_conditions_of( - self, - name: str | Hyperparameter, - ) -> list[AbstractCondition]: - """The conditions of all parents of a given hyperparameter. - - Parameters - ---------- - name : str, :ref:`Hyperparameters` - Can either be the name of a hyperparameter or the hyperparameter - object - - Returns - ------- - list[:ref:`Conditions`] - List with all conditions on parent hyperparameters - """ - if isinstance(name, Hyperparameter): - name = name.name # type: ignore - - # This raises an exception if the hyperparameter does not exist - self[name] - return self._get_parent_conditions_of(name) - - def get_all_unconditional_hyperparameters(self) -> list[str]: - """Names of unconditional hyperparameters. - - Returns - ------- - list[:ref:`Hyperparameters`] - List with all parent hyperparameters, which are not part of a condition - """ - return list(self._children[_ROOT]) - - def get_all_conditional_hyperparameters(self) -> set[str]: - """Names of all conditional hyperparameters. - - Returns - ------- - set[:ref:`Hyperparameters`] - Set with all conditional hyperparameter - """ - return self._conditionals - - def get_default_configuration(self) -> Configuration: - """Configuration containing hyperparameters with default values. - - Returns - ------- - :class:`~ConfigSpace.configuration_space.Configuration` - Configuration with the set default values - - """ - return self._check_default_configuration() - - # For backward compatibility - def check_configuration(self, configuration: Configuration) -> None: - """ - Check if a configuration is legal. Raises an error if not. - - Parameters - ---------- - configuration : :class:`~ConfigSpace.configuration_space.Configuration` - Configuration to check - """ - _assert_type(configuration, Configuration, method="check_configuration") - ConfigSpace.c_util.check_configuration(self, configuration.get_array(), False) - - def check_configuration_vector_representation(self, vector: np.ndarray) -> None: - """ - Raise error if configuration in vector representation is not legal. - - Parameters - ---------- - vector : np.ndarray - Configuration in vector representation - """ - _assert_type(vector, np.ndarray, method="check_configuration_vector_representation") - ConfigSpace.c_util.check_configuration(self, vector, False) - - def get_active_hyperparameters( - self, - configuration: Configuration, - ) -> set[Hyperparameter]: - """Set of active hyperparameter for a given configuration. - - Parameters - ---------- - configuration : :class:`~ConfigSpace.configuration_space.Configuration` - Configuration for which the active hyperparameter are returned - - Returns - ------- - set(:class:`~ConfigSpace.configuration_space.Configuration`) - The set of all active hyperparameter - - """ - vector = configuration.get_array() - active_hyperparameters = set() - for hp_name, hyperparameter in self._hyperparameters.items(): - conditions = self._parent_conditions_of[hyperparameter.name] - - active = True - for condition in conditions: - parent_vector_idx = condition.get_parents_vector() - - # if one of the parents is None, the hyperparameter cannot be - # active! Else we have to check this - # Note from trying to optimize this - this is faster than using - # dedicated numpy functions and indexing - if any(vector[i] != vector[i] for i in parent_vector_idx): - active = False - break - - if not condition.evaluate_vector(vector): - active = False - break - - if active: - active_hyperparameters.add(hp_name) - - return active_hyperparameters - - @overload - def sample_configuration(self, size: None = None) -> Configuration: - ... - - # Technically this is wrong given the current behaviour but it's - # sufficient for most cases. Once deprecation warning is up, - # we can just have `1` always return a list of configurations - # because an `int` was specified, `None` for single config. - @overload - def sample_configuration(self, size: int) -> list[Configuration]: - ... - - def sample_configuration( - self, - size: int | None = None, - ) -> Configuration | list[Configuration]: - """ - Sample ``size`` configurations from the configuration space object. - - Parameters - ---------- - size : int, optional - Number of configurations to sample. Default to 1 - - Returns - ------- - :class:`~ConfigSpace.configuration_space.Configuration`, - list[:class:`~ConfigSpace.configuration_space.Configuration`]: - A single configuration if ``size`` 1 else a list of Configurations - """ - if size == 1: - warnings.warn( - "Please leave at default or explicitly set `size=None`." - " In the future, specifying a size will always retunr a list, even if 1", - DeprecationWarning, - stacklevel=2, - ) - - # Maintain old behaviour by setting this - if size is None: - size = 1 - - _assert_type(size, int, method="sample_configuration") - if size < 1: - return [] - - iteration = 0 - missing = size - accepted_configurations: list[Configuration] = [] - num_hyperparameters = len(self._hyperparameters) - - unconditional_hyperparameters = self.get_all_unconditional_hyperparameters() - hyperparameters_with_children = [] - - _forbidden_clauses_unconditionals = [] - _forbidden_clauses_conditionals = [] - for clause in self.get_forbiddens(): - based_on_conditionals = False - for subclause in clause.get_descendant_literal_clauses(): - if isinstance(subclause, ForbiddenRelation): - if ( - subclause.left.name not in unconditional_hyperparameters - or subclause.right.name not in unconditional_hyperparameters - ): - based_on_conditionals = True - break - elif subclause.hyperparameter.name not in unconditional_hyperparameters: - based_on_conditionals = True - break - if based_on_conditionals: - _forbidden_clauses_conditionals.append(clause) - else: - _forbidden_clauses_unconditionals.append(clause) - - for uhp in unconditional_hyperparameters: - children = self._children_of[uhp] - if len(children) > 0: - hyperparameters_with_children.append(uhp) - - while len(accepted_configurations) < size: - if missing != size: - missing = int(1.1 * missing) - vector: np.ndarray = np.ndarray((missing, num_hyperparameters), dtype=float) - - for i, hp_name in enumerate(self._hyperparameters): - hyperparameter = self._hyperparameters[hp_name] - vector[:, i] = hyperparameter._sample(self.random, missing) - - for i in range(missing): - try: - configuration = Configuration( - self, - vector=ConfigSpace.c_util.correct_sampled_array( - vector[i].copy(), - _forbidden_clauses_unconditionals, - _forbidden_clauses_conditionals, - hyperparameters_with_children, - num_hyperparameters, - unconditional_hyperparameters, - self._hyperparameter_idx, - self._parent_conditions_of, - self._parents_of, - self._children_of, - ), - ) - accepted_configurations.append(configuration) - except ForbiddenValueError: - iteration += 1 - - if iteration == size * 100: - msg = (f"Cannot sample valid configuration for {self}",) - raise ForbiddenValueError(msg) from None - - missing = size - len(accepted_configurations) - - if size <= 1: - return accepted_configurations[0] - - return accepted_configurations - - def seed(self, seed: int) -> None: - """Set the random seed to a number. - - Parameters - ---------- - seed : int - The random seed - """ - self.random = np.random.RandomState(seed) - - def remove_hyperparameter_priors(self) -> ConfigurationSpace: - """Produces a new ConfigurationSpace where all priors on parameters are removed. - - Non-uniform hyperpararmeters are replaced with uniform ones, and - CategoricalHyperparameters with weights have their weights removed. - - Returns - ------- - :class:`~ConfigSpace.configuration_space.ConfigurationSpace` - The resulting configuration space, without priors on the hyperparameters - """ - uniform_config_space = ConfigurationSpace() - for parameter in self.values(): - if hasattr(parameter, "to_uniform"): - uniform_config_space.add_hyperparameter(parameter.to_uniform()) - else: - uniform_config_space.add_hyperparameter(copy.copy(parameter)) - - new_conditions = self.substitute_hyperparameters_in_conditions( - self.get_conditions(), - uniform_config_space, - ) - new_forbiddens = self.substitute_hyperparameters_in_forbiddens( - self.get_forbiddens(), - uniform_config_space, - ) - uniform_config_space.add_conditions(new_conditions) - uniform_config_space.add_forbidden_clauses(new_forbiddens) - - return uniform_config_space - - def estimate_size(self) -> float | int: - """Estimate the size of the current configuration space (i.e. unique configurations). - - This is ``np.inf`` in case if there is a single hyperparameter of size ``np.inf`` (i.e. a - :class:`~ConfigSpace.hyperparameters.UniformFloatHyperparameter`), otherwise - it is the product of the size of all hyperparameters. The function correctly guesses the - number of unique configurations if there are no condition and forbidden statements in the - configuration spaces. Otherwise, this is an upper bound. Use - :func:`~ConfigSpace.util.generate_grid` to generate all valid configurations if required. - - Returns - ------- - Union[float, int] - """ - sizes = [hp.get_size() for hp in self._hyperparameters.values()] - - if len(sizes) == 0: - return 0.0 - - acc = 1 - for size in sizes: - acc *= size - - return acc - - @staticmethod - def substitute_hyperparameters_in_conditions( - conditions: Iterable[ConditionComponent], - new_configspace: ConfigurationSpace, - ) -> list[ConditionComponent]: - """ - Takes a set of conditions and generates a new set of conditions with the same structure, - where each hyperparameter is replaced with its namesake in new_configspace. As such, the - set of conditions remain unchanged, but the included hyperparameters are changed to match - those types that exist in new_configspace. - - Parameters - ---------- - new_configspace: ConfigurationSpace - A ConfigurationSpace containing hyperparameters with the same names as those in the - conditions. - - Returns - ------- - list[ConditionComponent]: - The list of conditions, adjusted to fit the new ConfigurationSpace - """ - new_conditions = [] - for condition in conditions: - if isinstance(condition, AbstractConjunction): - conjunction_type = type(condition) - children = condition.get_descendant_literal_conditions() - substituted_children = ConfigurationSpace.substitute_hyperparameters_in_conditions( - children, - new_configspace, - ) - substituted_conjunction = conjunction_type(*substituted_children) - new_conditions.append(substituted_conjunction) - - elif isinstance(condition, AbstractCondition): - condition_type = type(condition) - child_name = condition.get_children()[0].name - parent_name = condition.get_parents()[0].name - new_child = new_configspace[child_name] - new_parent = new_configspace[parent_name] - - if hasattr(condition, "values"): - condition_arg = condition.values - substituted_condition = condition_type( - child=new_child, - parent=new_parent, - values=condition_arg, - ) - elif hasattr(condition, "value"): - condition_arg = condition.value - substituted_condition = condition_type( - child=new_child, - parent=new_parent, - value=condition_arg, - ) - else: - raise AttributeError( - f"Did not find the expected attribute in condition {type(condition)}.", - ) - - new_conditions.append(substituted_condition) - else: - raise TypeError(f"Did not expect the supplied condition type {type(condition)}.") - - return new_conditions - - @staticmethod - def substitute_hyperparameters_in_forbiddens( - forbiddens: Iterable[AbstractForbiddenComponent], - new_configspace: ConfigurationSpace, - ) -> list[AbstractForbiddenComponent]: - """ - Takes a set of forbidden clauses and generates a new set of forbidden clauses with the - same structure, where each hyperparameter is replaced with its namesake in new_configspace. - As such, the set of forbidden clauses remain unchanged, but the included hyperparameters are - changed to match those types that exist in new_configspace. - - Parameters - ---------- - forbiddens: Iterable[AbstractForbiddenComponent] - An iterable of forbiddens - new_configspace: ConfigurationSpace - A ConfigurationSpace containing hyperparameters with the same names as those in the - forbidden clauses. - - Returns - ------- - list[AbstractForbiddenComponent]: - The list of forbidden clauses, adjusted to fit the new ConfigurationSpace - """ - new_forbiddens = [] - for forbidden in forbiddens: - if isinstance(forbidden, AbstractForbiddenConjunction): - conjunction_type = type(forbidden) - children = forbidden.get_descendant_literal_clauses() - substituted_children = ConfigurationSpace.substitute_hyperparameters_in_forbiddens( - children, - new_configspace, - ) - substituted_conjunction = conjunction_type(*substituted_children) - new_forbiddens.append(substituted_conjunction) - - elif isinstance(forbidden, AbstractForbiddenClause): - forbidden_type = type(forbidden) - hyperparameter_name = forbidden.hyperparameter.name - new_hyperparameter = new_configspace[hyperparameter_name] - - if hasattr(forbidden, "values"): - forbidden_arg = forbidden.values - substituted_forbidden = forbidden_type( - hyperparameter=new_hyperparameter, - values=forbidden_arg, - ) - elif hasattr(forbidden, "value"): - forbidden_arg = forbidden.value - substituted_forbidden = forbidden_type( - hyperparameter=new_hyperparameter, - value=forbidden_arg, - ) - else: - raise AttributeError( - f"Did not find the expected attribute in forbidden {type(forbidden)}.", - ) - - new_forbiddens.append(substituted_forbidden) - elif isinstance(forbidden, ForbiddenRelation): - forbidden_type = type(forbidden) - left_name = forbidden.left.name - left_hyperparameter = new_configspace[left_name] - right_name = forbidden.right.name - right_hyperparameter = new_configspace[right_name] - - substituted_forbidden = forbidden_type( - left=left_hyperparameter, - right=right_hyperparameter, - ) - new_forbiddens.append(substituted_forbidden) - else: - raise TypeError(f"Did not expect type {type(forbidden)}.") - - return new_forbiddens - - def __eq__(self, other: Any) -> bool: - """Override the default Equals behavior.""" - if isinstance(other, self.__class__): - this_dict = self.__dict__.copy() - del this_dict["random"] - other_dict = other.__dict__.copy() - del other_dict["random"] - return this_dict == other_dict - return NotImplemented - - def __hash__(self) -> int: - """Override the default hash behavior (that returns the id or the object).""" - return hash(self.__repr__()) - - def __getitem__(self, key: str) -> Hyperparameter: - hp = self._hyperparameters.get(key) - if hp is None: - raise HyperparameterNotFoundError(key, space=self) - - return hp - - def __contains__(self, key: str) -> bool: - return key in self._hyperparameters - - def __repr__(self) -> str: - retval = io.StringIO() - retval.write("Configuration space object:\n Hyperparameters:\n") - - if self.name is not None: - retval.write(self.name) - retval.write("\n") - - hyperparameters = sorted(self.values(), key=lambda t: t.name) # type: ignore - if hyperparameters: - retval.write(" ") - retval.write("\n ".join([str(hyperparameter) for hyperparameter in hyperparameters])) - retval.write("\n") - - conditions = sorted(self.get_conditions(), key=lambda t: str(t)) - if conditions: - retval.write(" Conditions:\n") - retval.write(" ") - retval.write("\n ".join([str(condition) for condition in conditions])) - retval.write("\n") - - if self.get_forbiddens(): - retval.write(" Forbidden Clauses:\n") - retval.write(" ") - retval.write("\n ".join([str(clause) for clause in self.get_forbiddens()])) - retval.write("\n") - - retval.seek(0) - return retval.getvalue() - - def __iter__(self) -> Iterator[str]: - """Iterate over the hyperparameter names in the right order.""" - return iter(self._hyperparameters.keys()) - - def keys(self) -> KeysView[str]: - """Return the hyperparameter names in the right order.""" - return self._hyperparameters.keys() - - def __len__(self) -> int: - return len(self._hyperparameters) - - def _add_hyperparameter(self, hyperparameter: Hyperparameter) -> None: - hp_name = hyperparameter.name - - existing = self._hyperparameters.get(hp_name) - if existing is not None: - raise HyperparameterAlreadyExistsError(existing, hyperparameter, space=self) - - self._hyperparameters[hp_name] = hyperparameter - self._children[hp_name] = OrderedDict() - - # TODO remove (_ROOT) __HPOlib_configuration_space_root__, it is only used in - # to check for cyclic configuration spaces. If it is only added when - # cycles are checked, the code can become much easier (e.g. the parent - # caching can be more or less removed). - self._children[_ROOT][hp_name] = None - self._parents[hp_name] = OrderedDict() - self._parents[hp_name][_ROOT] = None - - # Save the index of each hyperparameter name to later on access a - # vector of hyperparameter values by indices, must be done twice - # because check_default_configuration depends on it - self._hyperparameter_idx.update({hp: i for i, hp in enumerate(self._hyperparameters)}) - - def _sort_hyperparameters(self) -> None: - levels: OrderedDict[str, int] = OrderedDict() - to_visit: deque[str] = deque() - for hp_name in self._hyperparameters: - to_visit.appendleft(hp_name) - - while len(to_visit) > 0: - current = to_visit.pop() - if _ROOT in self._parents[current]: - assert len(self._parents[current]) == 1 - levels[current] = 1 - - else: - all_parents_visited = True - depth = -1 - for parent in self._parents[current]: - if parent not in levels: - all_parents_visited = False - break - - depth = max(depth, levels[parent] + 1) - - if all_parents_visited: - levels[current] = depth - else: - to_visit.appendleft(current) - - by_level: defaultdict[int, list[str]] = defaultdict(list) - for hp in levels: - level = levels[hp] - by_level[level].append(hp) - - nodes = [] - # Sort and add to list - for level in sorted(by_level): - sorted_by_level = by_level[level] - sorted_by_level.sort() - nodes.extend(sorted_by_level) - - # Resort the OrderedDict - new_order = OrderedDict() - for node in nodes: - new_order[node] = self._hyperparameters[node] - self._hyperparameters = new_order - - # Update to reflect sorting - for i, hp in enumerate(self._hyperparameters): - self._hyperparameter_idx[hp] = i - self._idx_to_hyperparameter[i] = hp - - # Update order of _children - new_order = OrderedDict() - new_order[_ROOT] = self._children[_ROOT] - for hp in chain([_ROOT], self._hyperparameters): - # Also resort the children dict - children_sorting = [ - (self._hyperparameter_idx[child_name], child_name) - for child_name in self._children[hp] - ] - children_sorting.sort() - children_order = OrderedDict() - for _, child_name in children_sorting: - children_order[child_name] = self._children[hp][child_name] - new_order[hp] = children_order - self._children = new_order - - # Update order of _parents - new_order = OrderedDict() - for hp in self._hyperparameters: - # Also resort the parent's dict - if _ROOT in self._parents[hp]: - parent_sorting = [(-1, _ROOT)] - else: - parent_sorting = [ - (self._hyperparameter_idx[parent_name], parent_name) - for parent_name in self._parents[hp] - ] - parent_sorting.sort() - parent_order = OrderedDict() - for _, parent_name in parent_sorting: - parent_order[parent_name] = self._parents[hp][parent_name] - new_order[hp] = parent_order - self._parents = new_order - - # update conditions - for condition in self.get_conditions(): - condition.set_vector_idx(self._hyperparameter_idx) - - # forbidden clauses - for clause in self.get_forbiddens(): - clause.set_vector_idx(self._hyperparameter_idx) - - def _check_condition( - self, - child_node: Hyperparameter, - condition: ConditionComponent, - ) -> None: - for present_condition in self._get_parent_conditions_of(child_node.name): - if present_condition != condition: - raise AmbiguousConditionError(present_condition, condition) - - def _add_edge( - self, - parent_node: Hyperparameter, - child_node: Hyperparameter, - condition: ConditionComponent, - ) -> None: - with contextlib.suppress(Exception): - # TODO maybe this has to be done more carefully - del self._children[_ROOT][child_node.name] - - with contextlib.suppress(Exception): - del self._parents[child_node.name][_ROOT] - - self._children[parent_node.name][child_node.name] = condition - self._parents[child_node.name][parent_node.name] = condition - - self._conditionals.add(child_node.name) - - def _create_tmp_dag(self) -> nx.DiGraph: - tmp_dag = nx.DiGraph() - for hp_name in self._hyperparameters: - tmp_dag.add_node(hp_name) - tmp_dag.add_edge(_ROOT, hp_name) - - for parent_node_ in self._children: - if parent_node_ == _ROOT: - continue - for child_node_ in self._children[parent_node_]: - with contextlib.suppress(Exception): - tmp_dag.remove_edge(_ROOT, child_node_) - - condition = self._children[parent_node_][child_node_] - tmp_dag.add_edge(parent_node_, child_node_, condition=condition) - - return tmp_dag - - def _check_edges( - self, - edges: list[tuple[Hyperparameter, Hyperparameter]], - values: list[Any], - ) -> None: - for (parent, child), value in zip(edges, values): - # check if both nodes are already inserted into the graph - if child.name not in self._hyperparameters: - raise ChildNotFoundError(child, space=self) - - if parent.name not in self._hyperparameters: - raise ParentNotFoundError(parent, space=self) - - if child != self._hyperparameters[child.name]: - existing = self._hyperparameters[child.name] - raise HyperparameterAlreadyExistsError(existing, child, space=self) - - if parent != self._hyperparameters[parent.name]: - existing = self._hyperparameters[child.name] - raise HyperparameterAlreadyExistsError(existing, child, space=self) - - _assert_legal(parent, value) - - # TODO: recursively check everything which is inside the conditions, - # this means we have to recursively traverse the condition - tmp_dag = self._create_tmp_dag() - for parent, child in edges: - tmp_dag.add_edge(parent.name, child.name) - - if not nx.is_directed_acyclic_graph(tmp_dag): - cycles: list[list[str]] = list(nx.simple_cycles(tmp_dag)) - for cycle in cycles: - cycle.sort() - cycles.sort() - raise CyclicDependancyError(cycles) - - def _update_cache(self) -> None: - self._parent_conditions_of = { - name: self._get_parent_conditions_of(name) for name in self._hyperparameters - } - self._child_conditions_of = { - name: self._get_child_conditions_of(name) for name in self._hyperparameters - } - self._parents_of = {name: self.get_parents_of(name) for name in self._hyperparameters} - self._children_of = {name: self.get_children_of(name) for name in self._hyperparameters} - - def _check_forbidden_component(self, clause: AbstractForbiddenComponent) -> None: - _assert_type(clause, AbstractForbiddenComponent, "_check_forbidden_component") - - to_check = [] - relation_to_check = [] - if isinstance(clause, AbstractForbiddenClause): - to_check.append(clause) - elif isinstance(clause, AbstractForbiddenConjunction): - to_check.extend(clause.get_descendant_literal_clauses()) - elif isinstance(clause, ForbiddenRelation): - relation_to_check.extend(clause.get_descendant_literal_clauses()) - else: - raise NotImplementedError(type(clause)) - - def _check_hp(tmp_clause: AbstractForbiddenComponent, hp: Hyperparameter) -> None: - if hp.name not in self._hyperparameters: - raise HyperparameterNotFoundError( - hp, - space=self, - preamble=f"Cannot add '{tmp_clause}' because it references '{hp.name}'", - ) - - for tmp_clause in to_check: - _check_hp(tmp_clause, tmp_clause.hyperparameter) - - for tmp_clause in relation_to_check: - _check_hp(tmp_clause, tmp_clause.left) - _check_hp(tmp_clause, tmp_clause.right) - - def _get_children_of(self, name: str) -> list[Hyperparameter]: - conditions = self._get_child_conditions_of(name) - parents: list[Hyperparameter] = [] - for condition in conditions: - parents.extend(condition.get_children()) - return parents - - def _get_child_conditions_of(self, name: str) -> list[AbstractCondition]: - children = self._children[name] - return [children[child_name] for child_name in children if child_name != _ROOT] - - def _get_parents_of(self, name: str) -> list[Hyperparameter]: - """The parents hyperparameters of a given hyperparameter. - - Parameters - ---------- - name : str - - Returns - ------- - list - List with all parent hyperparameters - """ - conditions = self._get_parent_conditions_of(name) - parents: list[Hyperparameter] = [] - for condition in conditions: - parents.extend(condition.get_parents()) - return parents - - def _check_default_configuration(self) -> Configuration: - # Check if adding that hyperparameter leads to an illegal default configuration - instantiated_hyperparameters: dict[str, int | float | str | None] = {} - for hp in self.values(): - conditions = self._get_parent_conditions_of(hp.name) - active = True - for condition in conditions: - parent_names = [ - c.parent.name for c in condition.get_descendant_literal_conditions() - ] - - parents = { - parent_name: instantiated_hyperparameters[parent_name] - for parent_name in parent_names - } - - if not condition.evaluate(parents): - # TODO find out why a configuration is illegal! - active = False - - if not active: - instantiated_hyperparameters[hp.name] = None - elif isinstance(hp, Constant): - instantiated_hyperparameters[hp.name] = hp.value - else: - instantiated_hyperparameters[hp.name] = hp.default_value - - # TODO copy paste from check configuration - - # TODO add an extra Exception type for the case that the default - # configuration is forbidden! - return Configuration(self, values=instantiated_hyperparameters) - - def _get_parent_conditions_of(self, name: str) -> list[AbstractCondition]: - parents = self._parents[name] - return [parents[parent_name] for parent_name in parents if parent_name != _ROOT] - - def _check_configuration_rigorous( - self, - configuration: Configuration, - allow_inactive_with_values: bool = False, - ) -> None: - vector = configuration.get_array() - active_hyperparameters = self.get_active_hyperparameters(configuration) - - for hp_name, hyperparameter in self._hyperparameters.items(): - hp_value = vector[self._hyperparameter_idx[hp_name]] - active = hp_name in active_hyperparameters - - if not np.isnan(hp_value) and not hyperparameter.is_legal_vector(hp_value): - raise IllegalValueError(hyperparameter, hp_value) - - if active and np.isnan(hp_value): - raise ActiveHyperparameterNotSetError(hyperparameter) - - if not allow_inactive_with_values and not active and not np.isnan(hp_value): - raise InactiveHyperparameterSetError(hyperparameter, hp_value) - - self._check_forbidden(vector) - - def _check_forbidden(self, vector: np.ndarray) -> None: - ConfigSpace.c_util.check_forbidden(self.forbidden_clauses, vector) - - # ------------ Marked Deprecated -------------------- - # Probably best to only remove these once we actually - # make some other breaking changes - # * Search `Marked Deprecated` to find others - - def get_hyperparameter(self, name: str) -> Hyperparameter: - """Hyperparameter from the space with a given name. - - Parameters - ---------- - name : str - Name of the searched hyperparameter - - Returns - ------- - :ref:`Hyperparameters` - Hyperparameter with the name ``name`` - """ - warnings.warn( - "Prefer `space[name]` over `get_hyperparameter`", - DeprecationWarning, - stacklevel=2, - ) - return self[name] - - def get_hyperparameters(self) -> list[Hyperparameter]: - """All hyperparameters in the space. - - Returns - ------- - list(:ref:`Hyperparameters`) - A list with all hyperparameters stored in the configuration space object - """ - warnings.warn( - "Prefer using `list(space.values())` over `get_hyperparameters`", - DeprecationWarning, - stacklevel=2, - ) - return list(self._hyperparameters.values()) - - def get_hyperparameters_dict(self) -> dict[str, Hyperparameter]: - """All the ``(name, Hyperparameter)`` contained in the space. - - Returns - ------- - dict(str, :ref:`Hyperparameters`) - An OrderedDict of names and hyperparameters - """ - warnings.warn( - "Prefer using `dict(space)` over `get_hyperparameters_dict`", - DeprecationWarning, - stacklevel=2, - ) - return self._hyperparameters.copy() - - def get_hyperparameter_names(self) -> list[str]: - """Names of all the hyperparameter in the space. - - Returns - ------- - list(str) - List of hyperparameter names - """ - warnings.warn( - "Prefer using `list(space.keys())` over `get_hyperparameter_names`", - DeprecationWarning, - stacklevel=2, - ) - return list(self._hyperparameters.keys()) - - # --------------------------------------------------- diff --git a/ConfigSpace/exceptions.py b/ConfigSpace/exceptions.py deleted file mode 100644 index 2003953c..00000000 --- a/ConfigSpace/exceptions.py +++ /dev/null @@ -1,131 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Any - -if TYPE_CHECKING: - from ConfigSpace.conditions import ConditionComponent - from ConfigSpace.configuration_space import ConfigurationSpace - from ConfigSpace.hyperparameters import Hyperparameter - - -class ForbiddenValueError(ValueError): - """Raised when a combination of values is forbidden for a Configuration.""" - - -class IllegalValueError(ValueError): - def __init__(self, hyperparameter: Hyperparameter, value: Any): - super().__init__() - self.hyperparameter = hyperparameter - self.value = value - - def __str__(self) -> str: - return ( - f"Value {self.value}: ({type(self.value)}) is not allowed for" - f" hyperparameter {self.hyperparameter}" - ) - - -class ActiveHyperparameterNotSetError(ValueError): - def __init__(self, hyperparameter: Hyperparameter) -> None: - super().__init__(hyperparameter) - self.hyperparameter = hyperparameter - - def __str__(self) -> str: - return f"Hyperparameter is active but has no value set.\n{self.hyperparameter}" - - -class InactiveHyperparameterSetError(ValueError): - def __init__(self, hyperparameter: Hyperparameter, value: Any) -> None: - super().__init__(hyperparameter) - self.hyperparameter = hyperparameter - self.value = value - - def __str__(self) -> str: - return ( - f"Hyperparameter is inactive but has a value set as {self.value}.\n" - f"{self.hyperparameter}" - ) - - -class HyperparameterNotFoundError(ValueError): - def __init__( - self, - hyperparameter: Hyperparameter | str, - space: ConfigurationSpace, - preamble: str | None = None, - ): - super().__init__(hyperparameter, space, preamble) - self.preamble = preamble - self.hp_name = hyperparameter if isinstance(hyperparameter, str) else hyperparameter.name - self.space = space - - def __str__(self) -> str: - pre = f"{self.preamble}\n" if self.preamble is not None else "" - return f"{pre}" f"Hyperparameter {self.hp_name} not found in space." f"\n{self.space}" - - -class ChildNotFoundError(HyperparameterNotFoundError): - def __str__(self) -> str: - return "Child " + super().__str__() - - -class ParentNotFoundError(HyperparameterNotFoundError): - def __str__(self) -> str: - return "Parent " + super().__str__() - - -class HyperparameterIndexError(KeyError): - def __init__(self, idx: int, space: ConfigurationSpace): - super().__init__(idx, space) - self.idx = idx - self.space = space - - def __str__(self) -> str: - raise KeyError( - f"Hyperparameter #'{self.idx}' does not exist in this space." f"\n{self.space}", - ) - - -class AmbiguousConditionError(ValueError): - def __init__(self, present: ConditionComponent, new_condition: ConditionComponent): - super().__init__(present, new_condition) - self.present = present - self.new_condition = new_condition - - def __str__(self) -> str: - return ( - "Adding a second condition (different) for a hyperparameter is ambiguous" - " and therefore forbidden. Add a conjunction instead!" - f"\nAlready inserted: {self.present}" - f"\nNew one: {self.new_condition}" - ) - - -class HyperparameterAlreadyExistsError(ValueError): - def __init__( - self, - existing: Hyperparameter, - other: Hyperparameter, - space: ConfigurationSpace, - ): - super().__init__(existing, other, space) - self.existing = existing - self.other = other - self.space = space - - def __str__(self) -> str: - return ( - f"Hyperparameter {self.existing.name} already exists in space." - f"\nExisting: {self.existing}" - f"\nNew one: {self.other}" - f"{self.space}" - ) - - -class CyclicDependancyError(ValueError): - def __init__(self, cycles: list[list[str]]) -> None: - super().__init__(cycles) - self.cycles = cycles - - def __str__(self) -> str: - return f"Hyperparameter configuration contains a cycle {self.cycles}" diff --git a/ConfigSpace/forbidden.pxd b/ConfigSpace/forbidden.pxd deleted file mode 100644 index b381a19b..00000000 --- a/ConfigSpace/forbidden.pxd +++ /dev/null @@ -1,24 +0,0 @@ -import numpy as np -cimport numpy as np - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - - -cdef class AbstractForbiddenComponent(object): - - cdef public hyperparameter - cdef public int vector_id - cdef public value - cdef public DTYPE_t vector_value - - cdef int c_is_forbidden_vector(self, np.ndarray instantiated_hyperparameters, int strict) - cpdef get_descendant_literal_clauses(self) - cpdef set_vector_idx(self, hyperparameter_to_idx) - cpdef is_forbidden(self, instantiated_hyperparameters, strict) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx deleted file mode 100644 index 9ba0dd30..00000000 --- a/ConfigSpace/forbidden.pyx +++ /dev/null @@ -1,680 +0,0 @@ -# Copyright (c) 2014-2016, ConfigSpace developers -# Matthias Feurer -# Katharina Eggensperger -# and others (see commit history). -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the nor the -# names of itConfigurationSpaces contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import copy -import numpy as np -import io -from ConfigSpace.hyperparameters import Hyperparameter -from ConfigSpace.hyperparameters.hyperparameter cimport Hyperparameter -from typing import Dict, Any, Union - -from ConfigSpace.forbidden cimport AbstractForbiddenComponent - -from libc.stdlib cimport malloc, free -cimport numpy as np - - -cdef class AbstractForbiddenComponent(object): - - def __init__(self): - pass - - def __repr__(self): - pass - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - if self.value is None: - self.value = self.values - if other.value is None: - other.value = other.values - - return (self.value == other.value and - self.hyperparameter.name == other.hyperparameter.name) - - def __hash__(self) -> int: - """Override the default hash behavior (that returns the id or the object)""" - return hash(tuple(sorted(self.__dict__.items()))) - - def __copy__(self): - raise NotImplementedError() - - cpdef get_descendant_literal_clauses(self): - pass - - cpdef set_vector_idx(self, hyperparameter_to_idx): - pass - - cpdef is_forbidden(self, instantiated_hyperparameters, strict): - pass - - def is_forbidden_vector(self, instantiated_hyperparameters, strict): - return bool(self.c_is_forbidden_vector(instantiated_hyperparameters, strict)) - - cdef int c_is_forbidden_vector(self, np.ndarray instantiated_hyperparameters, int strict): - pass - - -cdef class AbstractForbiddenClause(AbstractForbiddenComponent): - - def __init__(self, hyperparameter: Hyperparameter): - if not isinstance(hyperparameter, Hyperparameter): - raise TypeError("Argument 'hyperparameter' is not of type %s." % - Hyperparameter) - self.hyperparameter = hyperparameter - self.vector_id = -1 - - cpdef get_descendant_literal_clauses(self): - return (self, ) - - cpdef set_vector_idx(self, hyperparameter_to_idx): - self.vector_id = hyperparameter_to_idx[self.hyperparameter.name] - - -cdef class SingleValueForbiddenClause(AbstractForbiddenClause): - def __init__(self, hyperparameter: Hyperparameter, value: Any) -> None: - super(SingleValueForbiddenClause, self).__init__(hyperparameter) - if not self.hyperparameter.is_legal(value): - raise ValueError("Forbidden clause must be instantiated with a " - "legal hyperparameter value for '%s', but got " - "'%s'" % (self.hyperparameter, str(value))) - self.value = value - self.vector_value = self.hyperparameter._inverse_transform(self.value) - - def __copy__(self): - return self.__class__( - hyperparameter=copy.copy(self.hyperparameter), - value=self.value - ) - - cpdef is_forbidden(self, instantiated_hyperparameters, strict): - value = instantiated_hyperparameters.get(self.hyperparameter.name) - if value is None: - if strict: - raise ValueError("Is_forbidden must be called with the " - "instantiated hyperparameter in the " - "forbidden clause; you are missing " - "'%s'" % self.hyperparameter.name) - else: - return False - - return self._is_forbidden(value) - - cdef int c_is_forbidden_vector(self, np.ndarray instantiated_vector, int strict): - cdef DTYPE_t value = instantiated_vector[self.vector_id] - if value != value: - if strict: - raise ValueError("Is_forbidden must be called with the " - "instantiated vector id in the " - "forbidden clause; you are missing " - "'%s'" % self.vector_id) - else: - return False - - return self._is_forbidden_vector(value) - - cdef int _is_forbidden(self, value): - pass - - cdef int _is_forbidden_vector(self, DTYPE_t value): - pass - - -cdef class MultipleValueForbiddenClause(AbstractForbiddenClause): - cdef public values - cdef public vector_values - - def __init__(self, hyperparameter: Hyperparameter, values: Any) -> None: - super(MultipleValueForbiddenClause, self).__init__(hyperparameter) - - for value in values: - if not self.hyperparameter.is_legal(value): - raise ValueError("Forbidden clause must be instantiated with a " - "legal hyperparameter value for '%s', but got " - "'%s'" % (self.hyperparameter, str(value))) - self.values = values - self.vector_values = [self.hyperparameter._inverse_transform(value) - for value in self.values] - - def __copy__(self): - return self.__class__( - hyperparameter=copy.copy(self.hyperparameter), - values=copy.deepcopy(self.values) - ) - - cpdef is_forbidden(self, instantiated_hyperparameters, strict): - value = instantiated_hyperparameters.get(self.hyperparameter.name) - if value is None: - if strict: - raise ValueError("Is_forbidden must be called with the " - "instantiated hyperparameter in the " - "forbidden clause; you are missing " - "'%s'." % self.hyperparameter.name) - else: - return False - - return self._is_forbidden(value) - - cdef int c_is_forbidden_vector(self, np.ndarray instantiated_vector, int strict): - cdef DTYPE_t value = instantiated_vector[self.vector_id] - - if value != value: - if strict: - raise ValueError("Is_forbidden must be called with the " - "instantiated vector id in the " - "forbidden clause; you are missing " - "'%s'" % self.vector_id) - else: - return False - - return self._is_forbidden_vector(value) - - cdef int _is_forbidden(self, value): - pass - - cdef int _is_forbidden_vector(self, DTYPE_t value): - pass - - -cdef class ForbiddenEqualsClause(SingleValueForbiddenClause): - """A ForbiddenEqualsClause - - It forbids a value from the value range of a hyperparameter to be - *equal to* ``value``. - - Forbids the value 2 for the hyperparameter *a* - - >>> from ConfigSpace import ConfigurationSpace, ForbiddenEqualsClause - >>> - >>> cs = ConfigurationSpace({"a": [1, 2, 3]}) - >>> forbidden_clause_a = ForbiddenEqualsClause(cs["a"], 2) - >>> cs.add_forbidden_clause(forbidden_clause_a) - Forbidden: a == 2 - - Parameters - ---------- - hyperparameter : :ref:`Hyperparameters` - Methods on which a restriction will be made - value : Any - forbidden value - """ - - def __repr__(self): - return "Forbidden: %s == %s" % (self.hyperparameter.name, - repr(self.value)) - - cdef int _is_forbidden(self, value): - return value == self.value - - cdef int _is_forbidden_vector(self, DTYPE_t value): - return value == self.vector_value - - -cdef class ForbiddenInClause(MultipleValueForbiddenClause): - def __init__(self, hyperparameter: Dict[str, Union[None, str, float, int]], - values: Any) -> None: - """A ForbiddenInClause. - - It forbids a value from the value range of a hyperparameter to be - *in* a collection of ``values``. - - Forbids the values 2, 3 for the hyperparameter *a* - - >>> from ConfigSpace import ConfigurationSpace, ForbiddenInClause - >>> - >>> cs = ConfigurationSpace({"a": [1, 2, 3]}) - >>> forbidden_clause_a = ForbiddenInClause(cs['a'], [2, 3]) - >>> cs.add_forbidden_clause(forbidden_clause_a) - Forbidden: a in {2, 3} - - Note - ---- - The forbidden values have to be a subset of the hyperparameter's values. - - Parameters - ---------- - hyperparameter : (:ref:`Hyperparameters`, dict) - Hyperparameter on which a restriction will be made - - values : Any - Collection of forbidden values - """ - - super(ForbiddenInClause, self).__init__(hyperparameter, values) - self.values = set(self.values) - self.vector_values = set(self.vector_values) - - def __repr__(self) -> str: - return "Forbidden: %s in %s" % ( - self.hyperparameter.name, - "{" + ", ".join((repr(value) - for value in sorted(self.values))) + "}") - - cdef int _is_forbidden(self, value): - return value in self.values - - cdef int _is_forbidden_vector(self, DTYPE_t value): - return value in self.vector_values - - -cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): - cdef public tuple components - cdef tuple dlcs - cdef public int n_components - - def __init__(self, *args: AbstractForbiddenComponent) -> None: - super(AbstractForbiddenConjunction, self).__init__() - # Test the classes - for idx, component in enumerate(args): - if not isinstance(component, AbstractForbiddenComponent): - raise TypeError("Argument #%d is not an instance of %s, " - "but %s" % ( - idx, AbstractForbiddenComponent, - type(component))) - - self.components = args - self.n_components = len(self.components) - self.dlcs = self.get_descendant_literal_clauses() - - def __repr__(self): - pass - - def __copy__(self): - return self.__class__([copy(comp) for comp in self.components]) - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - """ - - if not isinstance(other, self.__class__): - return False - - if self.n_components != other.n_components: - return False - - return all([self.components[i] == other.components[i] - for i in range(self.n_components)]) - - cpdef set_vector_idx(self, hyperparameter_to_idx): - for component in self.components: - component.set_vector_idx(hyperparameter_to_idx) - - cpdef get_descendant_literal_clauses(self): - children = [] - for component in self.components: - if isinstance(component, AbstractForbiddenConjunction): - children.extend(component.get_descendant_literal_clauses()) - else: - children.append(component) - return tuple(children) - - cpdef is_forbidden(self, instantiated_hyperparameters, strict): - ihp_names = list(instantiated_hyperparameters.keys()) - - for dlc in self.dlcs: - if dlc.hyperparameter.name not in ihp_names: - if strict: - raise ValueError("Is_forbidden must be called with all " - "instantiated hyperparameters in the " - "and conjunction of forbidden clauses; " - "you are (at least) missing " - "'%s'" % dlc.hyperparameter.name) - else: - return False - - cdef int* arrptr - arrptr = malloc(sizeof(int) * self.n_components) - - # Finally, call is_forbidden for all direct descendents and combine the - # outcomes - np_index = 0 - for component in self.components: - e = component.is_forbidden(instantiated_hyperparameters, - strict=strict) - arrptr[np_index] = e - np_index += 1 - - rval = self._is_forbidden(self.n_components, arrptr) - free(arrptr) - return rval - - cdef int c_is_forbidden_vector(self, np.ndarray instantiated_vector, int strict): - cdef int e = 0 - cdef int rval - cdef AbstractForbiddenComponent component - - cdef int* arrptr - arrptr = malloc(sizeof(int) * self.n_components) - - # Finally, call is_forbidden for all direct descendents and combine the - # outcomes. Check only as many forbidden clauses as the actual - # evaluation function queries for (e.g. and conditions are False - # if only one of the components evaluates to False). - - for i in range(self.n_components): - component = self.components[i] - e = component.c_is_forbidden_vector(instantiated_vector, strict) - arrptr[i] = e - - rval = self._is_forbidden(self.n_components, arrptr) - free(arrptr) - return rval - - cdef int _is_forbidden(self, int I, int* evaluations): - pass - - -cdef class ForbiddenAndConjunction(AbstractForbiddenConjunction): - """A ForbiddenAndConjunction. - - The ForbiddenAndConjunction combines forbidden-clauses, which allows to - build powerful constraints. - - >>> from ConfigSpace import ( - ... ConfigurationSpace, - ... ForbiddenEqualsClause, - ... ForbiddenInClause, - ... ForbiddenAndConjunction - ... ) - >>> - >>> cs = ConfigurationSpace({"a": [1, 2, 3], "b": [2, 5, 6]}) - >>> - >>> forbidden_clause_a = ForbiddenEqualsClause(cs["a"], 2) - >>> forbidden_clause_b = ForbiddenInClause(cs["b"], [2]) - >>> - >>> forbidden_clause = ForbiddenAndConjunction(forbidden_clause_a, forbidden_clause_b) - >>> - >>> cs.add_forbidden_clause(forbidden_clause) - (Forbidden: a == 2 && Forbidden: b in {2}) - - Parameters - ---------- - *args : list(:ref:`Forbidden clauses`) - forbidden clauses, which should be combined - """ - - def __repr__(self) -> str: - retval = io.StringIO() - retval.write("(") - for idx, component in enumerate(self.components): - retval.write(str(component)) - if idx < len(self.components) - 1: - retval.write(" && ") - retval.write(")") - return retval.getvalue() - - cdef int _is_forbidden(self, int I, int* evaluations): - # Return False if one of the components evaluates to False - - for i in range(I): - if evaluations[i] == 0: - return 0 - return 1 - - cdef int c_is_forbidden_vector(self, np.ndarray instantiated_vector, int strict): - # Copy from above to have early stopping of the evaluation of clauses - - # gave only very modest improvements of ~5%; should probably be reworked - # if adding more conjunctions in order to use better software design to - # avoid code duplication. - cdef int e = 0 - cdef AbstractForbiddenComponent component - - # Finally, call is_forbidden for all direct descendents and combine the - # outcomes. Check only as many forbidden clauses as the actual - # evaluation function queries for (e.g. and conditions are False - # if only one of the components evaluates to False). - - for i in range(self.n_components): - component = self.components[i] - e = component.c_is_forbidden_vector(instantiated_vector, strict) - if e == 0: - return 0 - - return 1 - - -cdef class ForbiddenRelation(AbstractForbiddenComponent): - - cdef public left - cdef public right - cdef public int[2] vector_ids - - def __init__(self, left: Hyperparameter, right : Hyperparameter): - if not isinstance(left, Hyperparameter): - raise TypeError("Argument 'left' is not of type %s." % Hyperparameter) - if not isinstance(right, Hyperparameter): - raise TypeError("Argument 'right' is not of type %s." % Hyperparameter) - - self.left = left - self.right = right - self.vector_ids = (-1, -1) - - def __eq__(self, other: Any) -> bool: - if not isinstance(other, self.__class__): - return False - return self.left == other.left and self.right == other.right - - def __copy__(self): - return self.__class__( - a=copy.copy(self.left), - b=copy.copy(self.right) - ) - - cpdef get_descendant_literal_clauses(self): - return (self,) - - cpdef set_vector_idx(self, hyperparameter_to_idx): - self.vector_ids = (hyperparameter_to_idx[self.left.name], hyperparameter_to_idx[self.right.name]) - - cpdef is_forbidden(self, instantiated_hyperparameters, strict): - left = instantiated_hyperparameters.get(self.left.name) - right = instantiated_hyperparameters.get(self.right.name) - if left is None: - if strict: - raise ValueError("Is_forbidden must be called with the " - "instantiated hyperparameters in the " - "forbidden clause; you are missing " - "'%s'" % self.left.name) - else: - return False - if right is None: - if strict: - raise ValueError("Is_forbidden must be called with the " - "instantiated hyperparameters in the " - "forbidden clause; you are missing " - "'%s'" % self.right.name) - else: - return False - - return self._is_forbidden(left, right) - - cdef int _is_forbidden(self, left, right) except -1: - pass - - cdef int c_is_forbidden_vector(self, np.ndarray instantiated_vector, int strict): - cdef DTYPE_t left = instantiated_vector[self.vector_ids[0]] - cdef DTYPE_t right = instantiated_vector[self.vector_ids[1]] - - if left != left: - if strict: - raise ValueError("Is_forbidden must be called with the " - "instantiated vector id in the " - "forbidden clause; you are missing " - "'%s'" % self.vector_ids[0]) - else: - return False - - if right != right: - if strict: - raise ValueError("Is_forbidden must be called with the " - "instantiated vector id in the " - "forbidden clause; you are missing " - "'%s'" % self.vector_ids[1]) - else: - return False - - # Relation is always evaluated against actual value and not vector representation - return self._is_forbidden(self.left._transform(left), self.right._transform(right)) - - cdef int _is_forbidden_vector(self, DTYPE_t left, DTYPE_t right) except -1: - pass - - -cdef class ForbiddenLessThanRelation(ForbiddenRelation): - """A ForbiddenLessThan relation between two hyperparameters. - - The ForbiddenLessThan compares the values of two hyperparameters. - - >>> from ConfigSpace import ConfigurationSpace, ForbiddenLessThanRelation - >>> - >>> cs = ConfigurationSpace({"a": [1, 2, 3], "b": [2, 5, 6]}) - >>> - >>> forbidden_clause = ForbiddenLessThanRelation(cs['a'], cs['b']) - >>> cs.add_forbidden_clause(forbidden_clause) - Forbidden: a < b - - Note - ---- - If the values of the both hyperparameters are not comparible - (e.g. comparing int and str), a TypeError is raised. For OrdinalHyperparameters - the actual values are used for comparison **not** their ordinal value. - - Parameters - ---------- - left : :ref:`Hyperparameters` - left side of the comparison - - right : :ref:`Hyperparameters` - right side of the comparison - """ - - def __repr__(self): - return "Forbidden: %s < %s" % (self.left.name, self.right.name) - - cdef int _is_forbidden(self, left, right) except -1: - return left < right - - cdef int _is_forbidden_vector(self, DTYPE_t left, DTYPE_t right) except -1: - return left < right - - -cdef class ForbiddenEqualsRelation(ForbiddenRelation): - """A ForbiddenEquals relation between two hyperparameters. - - The ForbiddenEquals compares the values of two hyperparameters. - - >>> from ConfigSpace import ConfigurationSpace, ForbiddenEqualsRelation - >>> - >>> cs = ConfigurationSpace({"a": [1, 2, 3], "b": [2, 5, 6]}) - >>> - >>> forbidden_clause = ForbiddenEqualsRelation(cs['a'], cs['b']) - >>> cs.add_forbidden_clause(forbidden_clause) - Forbidden: a == b - - Note - ---- - If the values of the both hyperparameters are not comparible - (e.g. comparing int and str), a TypeError is raised. For OrdinalHyperparameters - the actual values are used for comparison **not** their ordinal value. - - Parameters - ---------- - left : :ref:`Hyperparameters` - left side of the comparison - right : :ref:`Hyperparameters` - right side of the comparison - """ - - def __repr__(self): - return "Forbidden: %s == %s" % (self.left.name, self.right.name) - - cdef int _is_forbidden(self, left, right) except -1: - return left == right - - cdef int _is_forbidden_vector(self, DTYPE_t left, DTYPE_t right) except -1: - return left == right - - -cdef class ForbiddenGreaterThanRelation(ForbiddenRelation): - """A ForbiddenGreaterThan relation between two hyperparameters. - - The ForbiddenGreaterThan compares the values of two hyperparameters. - - >>> from ConfigSpace import ConfigurationSpace, ForbiddenGreaterThanRelation - >>> - >>> cs = ConfigurationSpace({"a": [1, 2, 3], "b": [2, 5, 6]}) - >>> forbidden_clause = ForbiddenGreaterThanRelation(cs['a'], cs['b']) - >>> - >>> cs.add_forbidden_clause(forbidden_clause) - Forbidden: a > b - - Note - ---- - If the values of the both hyperparameters are not comparible - (e.g. comparing int and str), a TypeError is raised. For OrdinalHyperparameters - the actual values are used for comparison **not** their ordinal value. - - Parameters - ---------- - left : :ref:`Hyperparameters` - left side of the comparison - right : :ref:`Hyperparameters` - right side of the comparison - """ - - def __repr__(self): - return "Forbidden: %s > %s" % (self.left.name, self.right.name) - - cdef int _is_forbidden(self, left, right) except -1: - return left > right - - cdef int _is_forbidden_vector(self, DTYPE_t left, DTYPE_t right) except -1: - return left > right diff --git a/ConfigSpace/functional.py b/ConfigSpace/functional.py deleted file mode 100644 index 4abeed3b..00000000 --- a/ConfigSpace/functional.py +++ /dev/null @@ -1,83 +0,0 @@ -from __future__ import annotations - -from typing import Iterator - -import numpy as np -from more_itertools import roundrobin - - -def center_range( - center: int, - low: int, - high: int, - step: int = 1, -) -> Iterator[int]: - """Get a range centered around a value. - - >>> list(center_range(5, 0, 10)) - [4, 6, 3, 7, 2, 8, 1, 9, 0, 10] - - Parameters - ---------- - center: int - The center of the range - - low: int - The low end of the range - - high: int - The high end of the range - - step: int = 1 - The step size - - Returns - ------- - Iterator[int] - """ - assert low <= center <= high - above_center = range(center + step, high + 1, step) - below_center = range(center - step, low - 1, -step) - yield from roundrobin(below_center, above_center) - - -def arange_chunked( - start: int, - stop: int, - step: int = 1, - *, - chunk_size: int, -) -> Iterator[np.ndarray]: - """Get np.arange in a chunked fashion. - - >>> list(arange_chunked(0, 10, 3)) - [array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8]), array([9])] - - Parameters - ---------- - start: int - The start of the range - - stop: int - The stop of the range - - chunk_size: int - The size of the chunks - - step: int = 1 - The step size - - Returns - ------- - Iterator[np.ndarray] - """ - assert step > 0 - assert chunk_size > 0 - assert start < stop - n_items = int(np.ceil((stop - start) / step)) - n_chunks = int(np.ceil(n_items / chunk_size)) - - for chunk in range(0, n_chunks): - chunk_start = start + (chunk * chunk_size) - chunk_stop = min(chunk_start + chunk_size, stop) - yield np.arange(chunk_start, chunk_stop, step) diff --git a/ConfigSpace/hyperparameters/__init__.py b/ConfigSpace/hyperparameters/__init__.py deleted file mode 100644 index e8410058..00000000 --- a/ConfigSpace/hyperparameters/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from .beta_float import BetaFloatHyperparameter -from .beta_integer import BetaIntegerHyperparameter -from .categorical import CategoricalHyperparameter -from .constant import Constant, UnParametrizedHyperparameter -from .float_hyperparameter import FloatHyperparameter -from .hyperparameter import Hyperparameter -from .integer_hyperparameter import IntegerHyperparameter -from .normal_float import NormalFloatHyperparameter -from .normal_integer import NormalIntegerHyperparameter -from .numerical import NumericalHyperparameter -from .ordinal import OrdinalHyperparameter -from .uniform_float import UniformFloatHyperparameter -from .uniform_integer import UniformIntegerHyperparameter - -__all__ = [ - "Hyperparameter", - "Constant", - "UnParametrizedHyperparameter", - "OrdinalHyperparameter", - "CategoricalHyperparameter", - "NumericalHyperparameter", - "FloatHyperparameter", - "IntegerHyperparameter", - "UniformFloatHyperparameter", - "UniformIntegerHyperparameter", - "NormalFloatHyperparameter", - "NormalIntegerHyperparameter", - "BetaFloatHyperparameter", - "BetaIntegerHyperparameter", -] diff --git a/ConfigSpace/hyperparameters/beta_float.pxd b/ConfigSpace/hyperparameters/beta_float.pxd deleted file mode 100644 index 73f62770..00000000 --- a/ConfigSpace/hyperparameters/beta_float.pxd +++ /dev/null @@ -1,19 +0,0 @@ -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from .uniform_float cimport UniformFloatHyperparameter - - -cdef class BetaFloatHyperparameter(UniformFloatHyperparameter): - cdef public alpha - cdef public beta diff --git a/ConfigSpace/hyperparameters/beta_float.pyx b/ConfigSpace/hyperparameters/beta_float.pyx deleted file mode 100644 index 3192cc2a..00000000 --- a/ConfigSpace/hyperparameters/beta_float.pyx +++ /dev/null @@ -1,231 +0,0 @@ -import io -import warnings -from typing import Any, Dict, Union, Optional - -from scipy.stats import beta as spbeta - -import numpy as np -cimport numpy as np -np.import_array() - -from ConfigSpace.hyperparameters.beta_integer cimport BetaIntegerHyperparameter - - -cdef class BetaFloatHyperparameter(UniformFloatHyperparameter): - - def __init__(self, name: str, alpha: Union[int, float], beta: Union[int, float], - lower: Union[float, int], - upper: Union[float, int], - default_value: Union[None, float] = None, - q: Union[int, float, None] = None, log: bool = False, - meta: Optional[Dict] = None) -> None: - r""" - A beta distributed float hyperparameter. The 'lower' and 'upper' parameters move the - distribution from the [0, 1]-range and scale it appropriately, but the shape of the - distribution is preserved as if it were in [0, 1]-range. - - Its values are sampled from a beta distribution - :math:`Beta(\alpha, \beta)`. - - >>> from ConfigSpace import BetaFloatHyperparameter - >>> - >>> BetaFloatHyperparameter('b', alpha=3, beta=2, lower=1, upper=4, log=False) - b, Type: BetaFloat, Alpha: 3.0 Beta: 2.0, Range: [1.0, 4.0], Default: 3.0 - - Parameters - ---------- - name : str - Name of the hyperparameter, with which it can be accessed - alpha : int, float - Alpha parameter of the normalized beta distribution - beta : int, float - Beta parameter of the normalized beta distribution - lower : int, float - Lower bound of a range of values from which the hyperparameter will be sampled. - The Beta disribution gets scaled by the total range of the hyperparameter. - upper : int, float - Upper bound of a range of values from which the hyperparameter will be sampled. - The Beta disribution gets scaled by the total range of the hyperparameter. - default_value : int, float, optional - Sets the default value of a hyperparameter to a given value - q : int, float, optional - Quantization factor - log : bool, optional - If ``True``, the values of the hyperparameter will be sampled - on a logarithmic scale. Default to ``False`` - meta : Dict, optional - Field for holding meta data provided by the user. - Not used by the configuration space. - """ - # TODO - we cannot use the check_default of UniformFloat (but everything else), - # but we still need to overwrite it. Thus, we first just need it not to raise an - # error, which we do by setting default_value = upper - lower / 2 to not raise an error, - # then actually call check_default once we have alpha and beta, and are not inside - # UniformFloatHP. - super(BetaFloatHyperparameter, self).__init__( - name, lower, upper, (upper + lower) / 2, q, log, meta) - self.alpha = float(alpha) - self.beta = float(beta) - if (alpha < 1) or (beta < 1): - raise ValueError("Please provide values of alpha and beta larger than or equal to\ - 1 so that the probability density is finite.") - - if (self.q is not None) and (self.log is not None) and (default_value is None): - warnings.warn("Logscale and quantization together results in incorrect default values. " - "We recommend specifying a default value manually for this specific case.") - - self.default_value = self.check_default(default_value) - self.normalized_default_value = self._inverse_transform(self.default_value) - - def __repr__(self) -> str: - repr_str = io.StringIO() - repr_str.write("%s, Type: BetaFloat, Alpha: %s Beta: %s, Range: [%s, %s], Default: %s" % (self.name, repr(self.alpha), repr(self.beta), repr(self.lower), repr(self.upper), repr(self.default_value))) - - if self.log: - repr_str.write(", on log-scale") - if self.q is not None: - repr_str.write(", Q: %s" % str(self.q)) - repr_str.seek(0) - return repr_str.getvalue() - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - return ( - self.name == other.name and - self.default_value == other.default_value and - self.alpha == other.alpha and - self.beta == other.beta and - self.log == other.log and - self.q == other.q and - self.lower == other.lower and - self.upper == other.upper - ) - - def __copy__(self): - return BetaFloatHyperparameter( - name=self.name, - default_value=self.default_value, - alpha=self.alpha, - beta=self.beta, - log=self.log, - q=self.q, - lower=self.lower, - upper=self.upper, - meta=self.meta - ) - - def __hash__(self): - return hash((self.name, self.alpha, self.beta, self.lower, self.upper, self.log, self.q)) - - def to_uniform(self) -> "UniformFloatHyperparameter": - return UniformFloatHyperparameter(self.name, - self.lower, - self.upper, - default_value=self.default_value, - q=self.q, log=self.log, meta=self.meta) - - def check_default(self, default_value: Union[int, float, None]) -> Union[int, float]: - # return mode as default - # TODO - for log AND quantization together specifially, this does not give the exact right - # value, due to the bounds _lower and _upper being adjusted when quantizing in - # UniformFloat. - if default_value is None: - if (self.alpha > 1) or (self.beta > 1): - normalized_mode = (self.alpha - 1) / (self.alpha + self.beta - 2) - else: - # If both alpha and beta are 1, we have a uniform distribution. - normalized_mode = 0.5 - - ub = self._inverse_transform(self.upper) - lb = self._inverse_transform(self.lower) - scaled_mode = normalized_mode * (ub - lb) + lb - return self._transform_scalar(scaled_mode) - - elif self.is_legal(default_value): - return default_value - else: - raise ValueError("Illegal default value %s" % str(default_value)) - - def to_integer(self) -> "BetaIntegerHyperparameter": - if self.q is None: - q_int = None - else: - q_int = int(np.rint(self.q)) - - lower = int(np.ceil(self.lower)) - upper = int(np.floor(self.upper)) - default_value = int(np.rint(self.default_value)) - return BetaIntegerHyperparameter(self.name, lower=lower, upper=upper, alpha=self.alpha, beta=self.beta, - default_value=default_value, q=q_int, log=self.log) - - def is_legal(self, value: Union[float]) -> bool: - if isinstance(value, (float, int)): - return self.upper >= value >= self.lower - return False - - cpdef bint is_legal_vector(self, DTYPE_t value): - return self._upper >= value >= self._lower - - def _sample(self, rs: np.random.RandomState, size: Optional[int] = None - ) -> Union[np.ndarray, float]: - alpha = self.alpha - beta = self.beta - return spbeta(alpha, beta).rvs(size=size, random_state=rs) - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - ub = self._inverse_transform(self.upper) - lb = self._inverse_transform(self.lower) - alpha = self.alpha - beta = self.beta - return spbeta(alpha, beta, loc=lb, scale=ub-lb).pdf(vector) \ - * (ub-lb) / (self._upper - self._lower) - - def get_max_density(self) -> float: - if (self.alpha > 1) or (self.beta > 1): - normalized_mode = (self.alpha - 1) / (self.alpha + self.beta - 2) - elif self.alpha < self.beta: - normalized_mode = 0 - elif self.alpha > self.beta: - normalized_mode = 1 - else: - normalized_mode = 0.5 - - ub = self._inverse_transform(self.upper) - lb = self._inverse_transform(self.lower) - scaled_mode = normalized_mode * (ub - lb) + lb - - # Since _pdf takes only a numpy array, we have to create the array, - # and retrieve the element in the first (and only) spot in the array - return self._pdf(np.array([scaled_mode]))[0] diff --git a/ConfigSpace/hyperparameters/beta_integer.pxd b/ConfigSpace/hyperparameters/beta_integer.pxd deleted file mode 100644 index 11319cf0..00000000 --- a/ConfigSpace/hyperparameters/beta_integer.pxd +++ /dev/null @@ -1,21 +0,0 @@ -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from ConfigSpace.hyperparameters.uniform_integer cimport UniformIntegerHyperparameter - - -cdef class BetaIntegerHyperparameter(UniformIntegerHyperparameter): - cdef public alpha - cdef public beta - cdef public bfhp - cdef public normalization_constant diff --git a/ConfigSpace/hyperparameters/beta_integer.pyx b/ConfigSpace/hyperparameters/beta_integer.pyx deleted file mode 100644 index ed31c8aa..00000000 --- a/ConfigSpace/hyperparameters/beta_integer.pyx +++ /dev/null @@ -1,220 +0,0 @@ -import io -from typing import Any, Dict, Optional, Union - -from scipy.stats import beta as spbeta - -import numpy as np -cimport numpy as np -np.import_array() - -from ConfigSpace.functional import arange_chunked -from ConfigSpace.hyperparameters.beta_float cimport BetaFloatHyperparameter - -# OPTIM: Some operations generate an arange which could blowup memory if -# done over the entire space of integers (int32/64). -# To combat this, `arange_chunked` is used in scenarios where reducion -# operations over all the elments could be done in partial steps independantly. -# For example, a sum over the pdf values could be done in chunks. -# This may add some small overhead for smaller ranges but is unlikely to -# be noticable. -ARANGE_CHUNKSIZE = 10_000_000 - - -cdef class BetaIntegerHyperparameter(UniformIntegerHyperparameter): - - def __init__(self, name: str, alpha: Union[int, float], beta: Union[int, float], - lower: Union[int, float], - upper: Union[int, float], - default_value: Union[int, None] = None, q: Union[None, int] = None, - log: bool = False, - meta: Optional[Dict] = None) -> None: - r""" - A beta distributed integer hyperparameter. The 'lower' and 'upper' parameters move the - distribution from the [0, 1]-range and scale it appropriately, but the shape of the - distribution is preserved as if it were in [0, 1]-range. - - Its values are sampled from a beta distribution - :math:`Beta(\alpha, \beta)`. - - >>> from ConfigSpace import BetaIntegerHyperparameter - >>> - >>> BetaIntegerHyperparameter('b', alpha=3, beta=2, lower=1, upper=4, log=False) - b, Type: BetaInteger, Alpha: 3.0 Beta: 2.0, Range: [1, 4], Default: 3 - - - Parameters - ---------- - name : str - Name of the hyperparameter with which it can be accessed - alpha : int, float - Alpha parameter of the distribution, from which hyperparameter is sampled - beta : int, float - Beta parameter of the distribution, from which - hyperparameter is sampled - lower : int, float - Lower bound of a range of values from which the hyperparameter will be sampled - upper : int, float - Upper bound of a range of values from which the hyperparameter will be sampled - default_value : int, optional - Sets the default value of a hyperparameter to a given value - q : int, optional - Quantization factor - log : bool, optional - If ``True``, the values of the hyperparameter will be sampled - on a logarithmic scale. Defaults to ``False`` - meta : Dict, optional - Field for holding meta data provided by the user. - Not used by the configuration space. - - """ - super(BetaIntegerHyperparameter, self).__init__( - name, lower, upper, np.round((upper + lower) / 2), q, log, meta) - self.alpha = float(alpha) - self.beta = float(beta) - if (alpha < 1) or (beta < 1): - raise ValueError("Please provide values of alpha and beta larger than or equal to\ - 1 so that the probability density is finite.") - if self.q is None: - q = 1 - else: - q = self.q - self.bfhp = BetaFloatHyperparameter(self.name, - self.alpha, - self.beta, - log=self.log, - q=q, - lower=self.lower, - upper=self.upper, - default_value=self.default_value) - - self.default_value = self.check_default(default_value) - self.normalized_default_value = self._inverse_transform(self.default_value) - self.normalization_constant = self._compute_normalization() - - def __repr__(self) -> str: - repr_str = io.StringIO() - repr_str.write("%s, Type: BetaInteger, Alpha: %s Beta: %s, Range: [%s, %s], Default: %s" % (self.name, repr(self.alpha), repr(self.beta), repr(self.lower), repr(self.upper), repr(self.default_value))) - - if self.log: - repr_str.write(", on log-scale") - if self.q is not None: - repr_str.write(", Q: %s" % str(self.q)) - repr_str.seek(0) - return repr_str.getvalue() - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - return ( - self.name == other.name and - self.alpha == other.alpha and - self.beta == other.beta and - self.log == other.log and - self.q == other.q and - self.lower == other.lower and - self.upper == other.upper - ) - - def __hash__(self): - return hash((self.name, self.alpha, self.beta, self.lower, self.upper, self.log, self.q)) - - def __copy__(self): - return BetaIntegerHyperparameter( - name=self.name, - default_value=self.default_value, - alpha=self.alpha, - beta=self.beta, - log=self.log, - q=self.q, - lower=self.lower, - upper=self.upper, - meta=self.meta - ) - - def to_uniform(self) -> "UniformIntegerHyperparameter": - return UniformIntegerHyperparameter(self.name, - self.lower, - self.upper, - default_value=self.default_value, - q=self.q, log=self.log, meta=self.meta) - - def check_default(self, default_value: Union[int, float, None]) -> int: - if default_value is None: - # Here, we just let the BetaFloat take care of the default value - # computation, and just tansform it accordingly - value = self.bfhp.check_default(None) - value = self._inverse_transform(value) - value = self._transform(value) - return value - - if self.is_legal(default_value): - return default_value - else: - raise ValueError("Illegal default value {}".format(default_value)) - - def _sample(self, rs: np.random.RandomState, size: Optional[int] = None - ) -> Union[np.ndarray, float]: - value = self.bfhp._sample(rs, size=size) - # Map all floats which belong to the same integer value to the same - # float value by first transforming it to an integer and then - # transforming it back to a float between zero and one - value = self._transform(value) - value = self._inverse_transform(value) - return value - - def _compute_normalization(self): - if self.upper - self.lower > ARANGE_CHUNKSIZE: - a = self.bfhp._inverse_transform(self.lower) - b = self.bfhp._inverse_transform(self.upper) - confidence = 0.999999 - rv = spbeta(self.alpha, self.beta, loc=a, scale=b-a) - u, v = rv.ppf((1 - confidence) / 2), rv.ppf((1 + confidence) / 2) - lb = max(self.bfhp._transform(u), self.lower) - ub = min(self.bfhp._transform(v), self.upper + 1) - else: - lb = self.lower - ub = self.upper + 1 - - chunks = arange_chunked(lb, ub, chunk_size=ARANGE_CHUNKSIZE) - return sum(self.bfhp.pdf(chunk).sum() for chunk in chunks) - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). Optimally, an IntegerHyperparameter - should have a corresponding float, which can be utlized for the calls - to the probability density function (see e.g. NormalIntegerHyperparameter) - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - return self.bfhp._pdf(vector) / self.normalization_constant - - def get_max_density(self): - chunks = arange_chunked(self.lower, self.upper + 1, chunk_size=ARANGE_CHUNKSIZE) - maximum = max(self.bfhp.pdf(chunk).max() for chunk in chunks) - return maximum / self.normalization_constant diff --git a/ConfigSpace/hyperparameters/categorical.pyx b/ConfigSpace/hyperparameters/categorical.pyx deleted file mode 100644 index 5974819e..00000000 --- a/ConfigSpace/hyperparameters/categorical.pyx +++ /dev/null @@ -1,400 +0,0 @@ -from collections import Counter -import copy -import io -from typing import Any, Dict, List, Optional, Sequence, Tuple, Union - -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from ConfigSpace.hyperparameters.hyperparameter cimport Hyperparameter - - -cdef class CategoricalHyperparameter(Hyperparameter): - cdef public tuple choices - cdef public tuple weights - cdef public int num_choices - cdef public tuple probabilities - cdef list choices_vector - cdef set _choices_set - - # TODO add more magic for automated type recognition - # TODO move from list to tuple for choices argument - def __init__( - self, - name: str, - choices: Union[List[Union[str, float, int]], Tuple[Union[float, int, str]]], - default_value: Union[int, float, str, None] = None, - meta: Optional[Dict] = None, - weights: Optional[Sequence[Union[int, float]]] = None - ) -> None: - """ - A categorical hyperparameter. - - Its values are sampled from a set of ``values``. - - ``None`` is a forbidden value, please use a string constant instead and parse - it in your own code, see `here _` - for further details. - - >>> from ConfigSpace import CategoricalHyperparameter - >>> - >>> CategoricalHyperparameter('c', choices=['red', 'green', 'blue']) - c, Type: Categorical, Choices: {red, green, blue}, Default: red - - Parameters - ---------- - name : str - Name of the hyperparameter, with which it can be accessed - choices : list or tuple with str, float, int - Collection of values to sample hyperparameter from - default_value : int, float, str, optional - Sets the default value of the hyperparameter to a given value - meta : Dict, optional - Field for holding meta data provided by the user. - Not used by the configuration space. - weights: Sequence[int | float] | None = None - List of weights for the choices to be used (after normalization) as - probabilities during sampling, no negative values allowed - """ - - super(CategoricalHyperparameter, self).__init__(name, meta) - # TODO check that there is no bullshit in the choices! - counter = Counter(choices) - for choice in choices: - if counter[choice] > 1: - raise ValueError( - "Choices for categorical hyperparameters %s contain choice '%s' %d " - "times, while only a single oocurence is allowed." - % (name, choice, counter[choice]) - ) - if choice is None: - raise TypeError("Choice 'None' is not supported") - if isinstance(choices, set): - raise TypeError("Using a set of choices is prohibited as it can result in " - "non-deterministic behavior. Please use a list or a tuple.") - if isinstance(weights, set): - raise TypeError("Using a set of weights is prohibited as it can result in " - "non-deterministic behavior. Please use a list or a tuple.") - self.choices = tuple(choices) - if weights is not None: - self.weights = tuple(weights) - else: - self.weights = None - self.probabilities = self._get_probabilities(choices=self.choices, weights=weights) - self.num_choices = len(choices) - self.choices_vector = list(range(self.num_choices)) - self._choices_set = set(self.choices_vector) - self.default_value = self.check_default(default_value) - self.normalized_default_value = self._inverse_transform(self.default_value) - - def __repr__(self) -> str: - repr_str = io.StringIO() - repr_str.write("%s, Type: Categorical, Choices: {" % (self.name)) - for idx, choice in enumerate(self.choices): - repr_str.write(str(choice)) - if idx < len(self.choices) - 1: - repr_str.write(", ") - repr_str.write("}") - repr_str.write(", Default: ") - repr_str.write(str(self.default_value)) - # if the probability distribution is not uniform, write out the probabilities - if not np.all(self.probabilities == self.probabilities[0]): - repr_str.write(", Probabilities: %s" % str(self.probabilities)) - repr_str.seek(0) - return repr_str.getvalue() - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - if self.probabilities is not None: - ordered_probabilities_self = { - choice: self.probabilities[i] for i, choice in enumerate(self.choices) - } - else: - ordered_probabilities_self = None - if other.probabilities is not None: - ordered_probabilities_other = { - choice: ( - other.probabilities[other.choices.index(choice)] - if choice in other.choices else - None - ) - for choice in self.choices - } - else: - ordered_probabilities_other = None - - return ( - self.name == other.name and - set(self.choices) == set(other.choices) and - self.default_value == other.default_value and - ( - (ordered_probabilities_self is None and ordered_probabilities_other is None) or - ordered_probabilities_self == ordered_probabilities_other or - ( - ordered_probabilities_self is None - and len(np.unique(list(ordered_probabilities_other.values()))) == 1 - ) or - ( - ordered_probabilities_other is None - and len(np.unique(list(ordered_probabilities_self.values()))) == 1 - ) - ) - ) - - def __hash__(self): - return hash((self.name, self.choices)) - - def __copy__(self): - return CategoricalHyperparameter( - name=self.name, - choices=copy.deepcopy(self.choices), - default_value=self.default_value, - weights=copy.deepcopy(self.weights), - meta=self.meta - ) - - def to_uniform(self) -> "CategoricalHyperparameter": - """ - Creates a categorical parameter with equal weights for all choices - This is used for the uniform configspace when sampling configurations in the local search - in PiBO: https://openreview.net/forum?id=MMAeCXIa89 - - Returns - ---------- - CategoricalHyperparameter - An identical parameter as the original, except that all weights are uniform. - """ - return CategoricalHyperparameter( - name=self.name, - choices=copy.deepcopy(self.choices), - default_value=self.default_value, - meta=self.meta - ) - - cpdef int compare(self, value: Union[int, float, str], value2: Union[int, float, str]): - if value == value2: - return 0 - else: - return 1 - - cpdef int compare_vector(self, DTYPE_t value, DTYPE_t value2): - if value == value2: - return 0 - else: - return 1 - - def is_legal(self, value: Union[None, str, float, int]) -> bool: - if value in self.choices: - return True - else: - return False - - cpdef bint is_legal_vector(self, DTYPE_t value): - return value in self._choices_set - - def _get_probabilities(self, choices: Tuple[Union[None, str, float, int]], - weights: Union[None, List[float]]) -> Union[None, List[float]]: - if weights is None: - return tuple(np.ones(len(choices)) / len(choices)) - - if len(weights) != len(choices): - raise ValueError( - "The list of weights and the list of choices are required to be of same length.") - - weights = np.array(weights) - - if np.all(weights == 0): - raise ValueError("At least one weight has to be strictly positive.") - - if np.any(weights < 0): - raise ValueError("Negative weights are not allowed.") - - return tuple(weights / np.sum(weights)) - - def check_default(self, default_value: Union[None, str, float, int] - ) -> Union[str, float, int]: - if default_value is None: - return self.choices[np.argmax(self.weights) if self.weights is not None else 0] - elif self.is_legal(default_value): - return default_value - else: - raise ValueError("Illegal default value %s" % str(default_value)) - - def _sample(self, rs: np.random.RandomState, size: Optional[int] = None - ) -> Union[int, np.ndarray]: - return rs.choice(a=self.num_choices, size=size, replace=True, p=self.probabilities) - - cpdef np.ndarray _transform_vector(self, np.ndarray vector): - if np.isnan(vector).any(): - raise ValueError('Vector %s contains NaN\'s' % vector) - - if np.equal(np.mod(vector, 1), 0): - return self.choices[vector.astype(int)] - - raise ValueError("Can only index the choices of the ordinal " - "hyperparameter %s with an integer, but provided " - "the following float: %f" % (self, vector)) - - def _transform_scalar(self, scalar: Union[float, int]) -> Union[float, int, str]: - if scalar != scalar: - raise ValueError("Number %s is NaN" % scalar) - - if scalar % 1 == 0: - return self.choices[int(scalar)] - - raise ValueError("Can only index the choices of the ordinal " - "hyperparameter %s with an integer, but provided " - "the following float: %f" % (self, scalar)) - - def _transform(self, vector: Union[np.ndarray, float, int, str] - ) -> Optional[Union[np.ndarray, float, int]]: - try: - if isinstance(vector, np.ndarray): - return self._transform_vector(vector) - return self._transform_scalar(vector) - except ValueError: - return None - - def _inverse_transform(self, vector: Union[None, str, float, int]) -> Union[int, float]: - if vector is None: - return np.NaN - return self.choices.index(vector) - - def has_neighbors(self) -> bool: - return len(self.choices) > 1 - - def get_num_neighbors(self, value = None) -> int: - return len(self.choices) - 1 - - def get_neighbors(self, value: int, rs: np.random.RandomState, - number: Union[int, float] = np.inf, transform: bool = False - ) -> List[Union[float, int, str]]: - neighbors = [] # type: List[Union[float, int, str]] - if number < len(self.choices): - while len(neighbors) < number: - rejected = True - index = int(value) - while rejected: - neighbor_idx = rs.randint(0, self.num_choices) - if neighbor_idx != index: - rejected = False - - if transform: - candidate = self._transform(neighbor_idx) - else: - candidate = float(neighbor_idx) - - if candidate in neighbors: - continue - else: - neighbors.append(candidate) - else: - for candidate_idx, candidate_value in enumerate(self.choices): - if int(value) == candidate_idx: - continue - else: - if transform: - candidate = self._transform(candidate_idx) - else: - candidate = float(candidate_idx) - - neighbors.append(candidate) - - return neighbors - - def allow_greater_less_comparison(self) -> bool: - raise ValueError("Parent hyperparameter in a > or < " - "condition must be a subclass of " - "NumericalHyperparameter or " - "OrdinalHyperparameter, but is " - "") - - def pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the original parameter space (the one specified by the user). - For each parameter type, there is also a method _pdf which - operates on the transformed (and possibly normalized) parameter - space. Only legal values return a positive probability density, - otherwise zero. - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - # this check is to ensure shape is right (and np.shape does not work in cython) - if vector.ndim != 1: - raise ValueError("Method pdf expects a one-dimensional numpy array") - vector = np.array(self._inverse_transform(vector)) - return self._pdf(vector) - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). For categoricals, each vector gets - transformed to its corresponding index (but in float form). To be - able to retrieve the element corresponding to the index, the float - must be cast to int. - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - probs = np.array(self.probabilities) - nan = np.isnan(vector) - if np.any(nan): - # Temporarily pick any valid index to use `vector` as an index for `probs` - vector[nan] = 0 - res = np.array(probs[vector.astype(int)]) - if np.any(nan): - res[nan] = 0 - if res.ndim == 0: - return res.reshape(-1) - return res - - def get_max_density(self) -> float: - return np.max(self.probabilities) - - def get_size(self) -> float: - return len(self.choices) diff --git a/ConfigSpace/hyperparameters/constant.pyx b/ConfigSpace/hyperparameters/constant.pyx deleted file mode 100644 index 18c32c5d..00000000 --- a/ConfigSpace/hyperparameters/constant.pyx +++ /dev/null @@ -1,178 +0,0 @@ -from typing import Any, Dict, List, Optional, Union - -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from ConfigSpace.hyperparameters.hyperparameter cimport Hyperparameter - - -cdef class Constant(Hyperparameter): - cdef public value - cdef DTYPE_t value_vector - - def __init__(self, name: str, value: Union[str, int, float], meta: Optional[Dict] = None - ) -> None: - """ - Representing a constant hyperparameter in the configuration space. - - By sampling from the configuration space each time only a single, - constant ``value`` will be drawn from this hyperparameter. - - Parameters - ---------- - name : str - Name of the hyperparameter, with which it can be accessed - value : str, int, float - value to sample hyperparameter from - meta : Dict, optional - Field for holding meta data provided by the user. - Not used by the configuration space. - """ - super(Constant, self).__init__(name, meta) - allowed_types = (int, float, str) - - if not isinstance(value, allowed_types) or \ - isinstance(value, bool): - raise TypeError("Constant value is of type %s, but only the " - "following types are allowed: %s" % - (type(value), allowed_types)) # type: ignore - - self.value = value - self.value_vector = 0. - self.default_value = value - self.normalized_default_value = 0. - - def __repr__(self) -> str: - repr_str = ["%s" % self.name, - "Type: Constant", - "Value: %s" % self.value] - return ", ".join(repr_str) - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - return ( - self.value == other.value and - self.name == other.name and - self.default_value == other.default_value - ) - - def __copy__(self): - return Constant(self.name, self.value, meta=self.meta) - - def __hash__(self): - return hash((self.name, self.value)) - - def is_legal(self, value: Union[str, int, float]) -> bool: - return value == self.value - - cpdef bint is_legal_vector(self, DTYPE_t value): - return value == self.value_vector - - def _sample(self, rs: None, size: Optional[int] = None) -> Union[int, np.ndarray]: - return 0 if size == 1 else np.zeros((size,)) - - def _transform(self, vector: Optional[Union[np.ndarray, float, int]]) \ - -> Optional[Union[np.ndarray, float, int]]: - return self.value - - def _transform_vector(self, vector: Optional[np.ndarray]) \ - -> Optional[Union[np.ndarray, float, int]]: - return self.value - - def _transform_scalar(self, vector: Optional[Union[float, int]]) \ - -> Optional[Union[np.ndarray, float, int]]: - return self.value - - def _inverse_transform(self, vector: Union[np.ndarray, float, int] - ) -> Union[np.ndarray, int, float]: - if vector != self.value: - return np.NaN - return 0 - - def has_neighbors(self) -> bool: - return False - - def get_num_neighbors(self, value = None) -> int: - return 0 - - def get_neighbors(self, value: Any, rs: np.random.RandomState, number: int, - transform: bool = False) -> List: - return [] - - def pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the original parameter space (the one specified by the user). - For each hyperparameter type, there is also a method _pdf which - operates on the transformed (and possibly normalized) parameter - space. Only legal values return a positive probability density, - otherwise zero. - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - if vector.ndim != 1: - raise ValueError("Method pdf expects a one-dimensional numpy array") - return self._pdf(vector) - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - return (vector == self.value).astype(float) - - def get_max_density(self): - return 1.0 - - def get_size(self) -> float: - return 1.0 - -cdef class UnParametrizedHyperparameter(Constant): - pass diff --git a/ConfigSpace/hyperparameters/float_hyperparameter.pxd b/ConfigSpace/hyperparameters/float_hyperparameter.pxd deleted file mode 100644 index 66d39b1e..00000000 --- a/ConfigSpace/hyperparameters/float_hyperparameter.pxd +++ /dev/null @@ -1,19 +0,0 @@ -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from .numerical cimport NumericalHyperparameter - - -cdef class FloatHyperparameter(NumericalHyperparameter): - cpdef double _transform_scalar(self, double scalar) - cpdef np.ndarray _transform_vector(self, np.ndarray vector) diff --git a/ConfigSpace/hyperparameters/float_hyperparameter.pyx b/ConfigSpace/hyperparameters/float_hyperparameter.pyx deleted file mode 100644 index 06b30dcf..00000000 --- a/ConfigSpace/hyperparameters/float_hyperparameter.pyx +++ /dev/null @@ -1,88 +0,0 @@ -from typing import Dict, Optional, Union - -import numpy as np -cimport numpy as np -np.import_array() - - -cdef class FloatHyperparameter(NumericalHyperparameter): - def __init__(self, name: str, default_value: Union[int, float], meta: Optional[Dict] = None - ) -> None: - super(FloatHyperparameter, self).__init__(name, default_value, meta) - - def is_legal(self, value: Union[int, float]) -> bool: - raise NotImplementedError() - - cpdef bint is_legal_vector(self, DTYPE_t value): - raise NotImplementedError() - - def check_default(self, default_value: Union[int, float]) -> float: - raise NotImplementedError() - - def _transform(self, vector: Union[np.ndarray, float, int] - ) -> Optional[Union[np.ndarray, float, int]]: - try: - if isinstance(vector, np.ndarray): - return self._transform_vector(vector) - return self._transform_scalar(vector) - except ValueError: - return None - - cpdef double _transform_scalar(self, double scalar): - raise NotImplementedError() - - cpdef np.ndarray _transform_vector(self, np.ndarray vector): - raise NotImplementedError() - - def pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the original parameter space (the one specified by the user). - For each parameter type, there is also a method _pdf which - operates on the transformed (and possibly normalized) parameter - space. Only legal values return a positive probability density, - otherwise zero. - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - if vector.ndim != 1: - raise ValueError("Method pdf expects a one-dimensional numpy array") - vector = self._inverse_transform(vector) - return self._pdf(vector) - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - raise NotImplementedError() - - def get_max_density(self) -> float: - """ - Returns the maximal density on the pdf for the parameter (so not - the mode, but the value of the pdf on the mode). - """ - raise NotImplementedError() diff --git a/ConfigSpace/hyperparameters/hyperparameter.pxd b/ConfigSpace/hyperparameters/hyperparameter.pxd deleted file mode 100644 index c51e0016..00000000 --- a/ConfigSpace/hyperparameters/hyperparameter.pxd +++ /dev/null @@ -1,22 +0,0 @@ -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - - -cdef class Hyperparameter(object): - cdef public str name - cdef public default_value - cdef public DTYPE_t normalized_default_value - cdef public dict meta - - cpdef int compare_vector(self, DTYPE_t value, DTYPE_t value2) - cpdef bint is_legal_vector(self, DTYPE_t value) diff --git a/ConfigSpace/hyperparameters/hyperparameter.pyx b/ConfigSpace/hyperparameters/hyperparameter.pyx deleted file mode 100644 index e569fe80..00000000 --- a/ConfigSpace/hyperparameters/hyperparameter.pyx +++ /dev/null @@ -1,164 +0,0 @@ -from typing import Dict, Optional, Union - -import numpy as np -cimport numpy as np -np.import_array() - - -cdef class Hyperparameter(object): - - def __init__(self, name: str, meta: Optional[Dict]) -> None: - if not isinstance(name, str): - raise TypeError( - "The name of a hyperparameter must be an instance of" - " %s, but is %s." % (str(str), type(name))) - self.name: str = name - self.meta = meta - - def __repr__(self): - raise NotImplementedError() - - def is_legal(self, value): - raise NotImplementedError() - - cpdef bint is_legal_vector(self, DTYPE_t value): - """ - Check whether the given value is a legal value for the vector - representation of this hyperparameter. - - Parameters - ---------- - value - the vector value to check - - Returns - ------- - bool - True if the given value is a legal vector value, otherwise False - - """ - raise NotImplementedError() - - def sample(self, rs): - vector = self._sample(rs) - return self._transform(vector) - - def rvs( - self, - size: Optional[int] = None, - random_state: Optional[Union[int, np.random, np.random.RandomState]] = None - ) -> Union[float, np.ndarray]: - """ - scipy compatibility wrapper for ``_sample``, - allowing the hyperparameter to be used in sklearn API - hyperparameter searchers, eg. GridSearchCV. - - """ - - # copy-pasted from scikit-learn utils/validation.py - def check_random_state(seed): - """ - Turn seed into a np.random.RandomState instance - If seed is None (or np.random), return the RandomState singleton used - by np.random. - If seed is an int, return a new RandomState instance seeded with seed. - If seed is already a RandomState instance, return it. - If seed is a new-style np.random.Generator, return it. - Otherwise, raise ValueError. - - """ - if seed is None or seed is np.random: - return np.random.mtrand._rand - if isinstance(seed, (int, np.integer)): - return np.random.RandomState(seed) - if isinstance(seed, np.random.RandomState): - return seed - try: - # Generator is only available in numpy >= 1.17 - if isinstance(seed, np.random.Generator): - return seed - except AttributeError: - pass - raise ValueError("%r cannot be used to seed a numpy.random.RandomState" - " instance" % seed) - - # if size=None, return a value, but if size=1, return a 1-element array - - vector = self._sample( - rs=check_random_state(random_state), - size=size if size is not None else 1 - ) - if size is None: - vector = vector[0] - - return self._transform(vector) - - def _sample(self, rs, size): - raise NotImplementedError() - - def _transform( - self, - vector: Union[np.ndarray, float, int] - ) -> Optional[Union[np.ndarray, float, int]]: - raise NotImplementedError() - - def _inverse_transform(self, vector): - raise NotImplementedError() - - def has_neighbors(self): - raise NotImplementedError() - - def get_neighbors(self, value, rs, number, transform = False): - raise NotImplementedError() - - def get_num_neighbors(self, value): - raise NotImplementedError() - - cpdef int compare_vector(self, DTYPE_t value, DTYPE_t value2): - raise NotImplementedError() - - def pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the hyperparameter in - the hyperparameter space (the one specified by the user). - For each hyperparameter type, there is also a method _pdf which - operates on the transformed (and possibly normalized) hyperparameter - space. Only legal values return a positive probability density, - otherwise zero. - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - raise NotImplementedError() - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the hyperparameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - raise NotImplementedError() - - def get_size(self) -> float: - raise NotImplementedError() diff --git a/ConfigSpace/hyperparameters/integer_hyperparameter.pxd b/ConfigSpace/hyperparameters/integer_hyperparameter.pxd deleted file mode 100644 index 6e90dae9..00000000 --- a/ConfigSpace/hyperparameters/integer_hyperparameter.pxd +++ /dev/null @@ -1,20 +0,0 @@ -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from .numerical cimport NumericalHyperparameter - - -cdef class IntegerHyperparameter(NumericalHyperparameter): - cdef ufhp - cpdef long long _transform_scalar(self, double scalar) - cpdef np.ndarray _transform_vector(self, np.ndarray vector) diff --git a/ConfigSpace/hyperparameters/integer_hyperparameter.pyx b/ConfigSpace/hyperparameters/integer_hyperparameter.pyx deleted file mode 100644 index 3777c782..00000000 --- a/ConfigSpace/hyperparameters/integer_hyperparameter.pyx +++ /dev/null @@ -1,98 +0,0 @@ -from typing import Dict, Optional, Union - -import numpy as np -cimport numpy as np -np.import_array() - - -cdef class IntegerHyperparameter(NumericalHyperparameter): - def __init__(self, name: str, default_value: int, meta: Optional[Dict] = None) -> None: - super(IntegerHyperparameter, self).__init__(name, default_value, meta) - - def is_legal(self, value: int) -> bool: - raise NotImplemented - - cpdef bint is_legal_vector(self, DTYPE_t value): - raise NotImplemented - - def check_default(self, default_value) -> int: - raise NotImplemented - - def check_int(self, parameter: int, name: str) -> int: - if abs(int(parameter) - parameter) > 0.00000001 and \ - type(parameter) is not int: - raise ValueError("For the Integer parameter %s, the value must be " - "an Integer, too. Right now it is a %s with value" - " %s." % (name, type(parameter), str(parameter))) - return int(parameter) - - def _transform(self, vector: Union[np.ndarray, float, int] - ) -> Optional[Union[np.ndarray, float, int]]: - try: - if isinstance(vector, np.ndarray): - return self._transform_vector(vector) - return self._transform_scalar(vector) - except ValueError: - return None - - cpdef long long _transform_scalar(self, double scalar): - raise NotImplementedError() - - cpdef np.ndarray _transform_vector(self, np.ndarray vector): - raise NotImplementedError() - - def pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the hyperparameter in - the hyperparameter space (the one specified by the user). - For each hyperparameter type, there is also a method _pdf which - operates on the transformed (and possibly normalized) hyperparameter - space. Only legal values return a positive probability density, - otherwise zero. - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - if vector.ndim != 1: - raise ValueError("Method pdf expects a one-dimensional numpy array") - is_integer = (np.round(vector) == vector).astype(int) - vector = self._inverse_transform(vector) - return self._pdf(vector) * is_integer - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). Optimally, an IntegerHyperparameter - should have a corresponding float, which can be utlized for the calls - to the probability density function (see e.g. NormalIntegerHyperparameter) - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - raise NotImplementedError() - - def get_max_density(self) -> float: - """ - Returns the maximal density on the pdf for the parameter (so not - the mode, but the value of the pdf on the mode). - """ - raise NotImplementedError() diff --git a/ConfigSpace/hyperparameters/normal_float.pxd b/ConfigSpace/hyperparameters/normal_float.pxd deleted file mode 100644 index b6f5d1eb..00000000 --- a/ConfigSpace/hyperparameters/normal_float.pxd +++ /dev/null @@ -1,19 +0,0 @@ -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from .float_hyperparameter cimport FloatHyperparameter - - -cdef class NormalFloatHyperparameter(FloatHyperparameter): - cdef public mu - cdef public sigma diff --git a/ConfigSpace/hyperparameters/normal_float.pyx b/ConfigSpace/hyperparameters/normal_float.pyx deleted file mode 100644 index b7716702..00000000 --- a/ConfigSpace/hyperparameters/normal_float.pyx +++ /dev/null @@ -1,322 +0,0 @@ -import io -import math -from typing import Any, Dict, List, Optional, Union - -from scipy.stats import truncnorm, norm -import numpy as np -cimport numpy as np -np.import_array() - -from ConfigSpace.hyperparameters.uniform_float cimport UniformFloatHyperparameter -from ConfigSpace.hyperparameters.normal_integer cimport NormalIntegerHyperparameter - - -cdef class NormalFloatHyperparameter(FloatHyperparameter): - - def __init__(self, name: str, mu: Union[int, float], sigma: Union[int, float], - default_value: Union[None, float] = None, - q: Union[int, float, None] = None, log: bool = False, - lower: Optional[Union[float, int]] = None, - upper: Optional[Union[float, int]] = None, - meta: Optional[Dict] = None) -> None: - r""" - A normally distributed float hyperparameter. - - Its values are sampled from a normal distribution - :math:`\mathcal{N}(\mu, \sigma^2)`. - - >>> from ConfigSpace import NormalFloatHyperparameter - >>> - >>> NormalFloatHyperparameter('n', mu=0, sigma=1, log=False) - n, Type: NormalFloat, Mu: 0.0 Sigma: 1.0, Default: 0.0 - - Parameters - ---------- - name : str - Name of the hyperparameter, with which it can be accessed - mu : int, float - Mean of the distribution - sigma : int, float - Standard deviation of the distribution - default_value : int, float, optional - Sets the default value of a hyperparameter to a given value - q : int, float, optional - Quantization factor - log : bool, optional - If ``True``, the values of the hyperparameter will be sampled - on a logarithmic scale. Default to ``False`` - lower : int, float, optional - Lower bound of a range of values from which the hyperparameter will be sampled - upper : int, float, optional - Upper bound of a range of values from which the hyperparameter will be sampled - meta : Dict, optional - Field for holding meta data provided by the user. - Not used by the configuration space. - """ - super(NormalFloatHyperparameter, self).__init__(name, default_value, meta) - self.mu = float(mu) - self.sigma = float(sigma) - self.q = float(q) if q is not None else None - self.log = bool(log) - self.default_value = self.check_default(default_value) - self.normalized_default_value = self._inverse_transform(self.default_value) - - if (lower is not None) ^ (upper is not None): - raise ValueError("Only one bound was provided when both lower and upper bounds must be provided.") - - if lower is not None and upper is not None: - self.lower = float(lower) - self.upper = float(upper) - - if self.lower >= self.upper: - raise ValueError("Upper bound %f must be larger than lower bound " - "%f for hyperparameter %s" % - (self.upper, self.lower, name)) - elif log and self.lower <= 0: - raise ValueError("Negative lower bound (%f) for log-scale " - "hyperparameter %s is forbidden." % - (self.lower, name)) - - self.default_value = self.check_default(default_value) - - if self.log: - if self.q is not None: - lower = self.lower - (np.float64(self.q) / 2. - 0.0001) - upper = self.upper + (np.float64(self.q) / 2. - 0.0001) - else: - lower = self.lower - upper = self.upper - self._lower = np.log(lower) - self._upper = np.log(upper) - else: - if self.q is not None: - self._lower = self.lower - (self.q / 2. - 0.0001) - self._upper = self.upper + (self.q / 2. - 0.0001) - else: - self._lower = self.lower - self._upper = self.upper - if self.q is not None: - # There can be weird rounding errors, so we compare the result against self.q, see - # In [13]: 2.4 % 0.2 - # Out[13]: 0.1999999999999998 - if np.round((self.upper - self.lower) % self.q, 10) not in (0, self.q): - raise ValueError( - "Upper bound (%f) - lower bound (%f) must be a multiple of q (%f)" - % (self.upper, self.lower, self.q) - ) - - def __repr__(self) -> str: - repr_str = io.StringIO() - - if self.lower is None or self.upper is None: - repr_str.write("%s, Type: NormalFloat, Mu: %s Sigma: %s, Default: %s" % (self.name, repr(self.mu), repr(self.sigma), repr(self.default_value))) - else: - repr_str.write("%s, Type: NormalFloat, Mu: %s Sigma: %s, Range: [%s, %s], Default: %s" % (self.name, repr(self.mu), repr(self.sigma), repr(self.lower), repr(self.upper), repr(self.default_value))) - - if self.log: - repr_str.write(", on log-scale") - if self.q is not None: - repr_str.write(", Q: %s" % str(self.q)) - repr_str.seek(0) - return repr_str.getvalue() - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - return ( - self.name == other.name and - self.default_value == other.default_value and - self.mu == other.mu and - self.sigma == other.sigma and - self.log == other.log and - self.q == other.q and - self.lower == other.lower and - self.upper == other.upper - ) - - def __copy__(self): - return NormalFloatHyperparameter( - name=self.name, - default_value=self.default_value, - mu=self.mu, - sigma=self.sigma, - log=self.log, - q=self.q, - lower=self.lower, - upper=self.upper, - meta=self.meta - ) - - def __hash__(self): - return hash((self.name, self.mu, self.sigma, self.log, self.q, self.lower, self.upper)) - - def to_uniform(self, z: int = 3) -> "UniformFloatHyperparameter": - if self.lower is None or self.upper is None: - lb = self.mu - (z * self.sigma) - ub = self.mu + (z * self.sigma) - else: - lb = self.lower - ub = self.upper - - return UniformFloatHyperparameter(self.name, - lb, - ub, - default_value=self.default_value, - q=self.q, log=self.log, meta=self.meta) - - def check_default(self, default_value: Union[int, float]) -> Union[int, float]: - if default_value is None: - if self.log: - return self._transform_scalar(self.mu) - else: - return self.mu - - elif self.is_legal(default_value): - return default_value - else: - raise ValueError("Illegal default value %s" % str(default_value)) - - def to_integer(self) -> "NormalIntegerHyperparameter": - if self.q is None: - q_int = None - else: - q_int = int(np.rint(self.q)) - if self.lower is None: - lower = None - upper = None - else: - lower=np.ceil(self.lower) - upper=np.floor(self.upper) - - return NormalIntegerHyperparameter(self.name, int(np.rint(self.mu)), self.sigma, - lower=lower, upper=upper, - default_value=int(np.rint(self.default_value)), - q=q_int, log=self.log) - - def is_legal(self, value: Union[float]) -> bool: - return (isinstance(value, (float, int, np.number))) and \ - (self.lower is None or value >= self.lower) and \ - (self.upper is None or value <= self.upper) - - cpdef bint is_legal_vector(self, DTYPE_t value): - return isinstance(value, float) or isinstance(value, int) - - def _sample(self, rs: np.random.RandomState, size: Optional[int] = None - ) -> Union[np.ndarray, float]: - - if self.lower is None: - mu = self.mu - sigma = self.sigma - return rs.normal(mu, sigma, size=size) - else: - mu = self.mu - sigma = self.sigma - lower = self._lower - upper = self._upper - a = (lower - mu) / sigma - b = (upper - mu) / sigma - - return truncnorm.rvs(a, b, loc=mu, scale=sigma, size=size, random_state=rs) - - cpdef np.ndarray _transform_vector(self, np.ndarray vector): - if np.isnan(vector).any(): - raise ValueError('Vector %s contains NaN\'s' % vector) - if self.log: - vector = np.exp(vector) - if self.q is not None: - vector = np.rint(vector / self.q) * self.q - return vector - - cpdef double _transform_scalar(self, double scalar): - if scalar != scalar: - raise ValueError("Number %s is NaN" % scalar) - if self.log: - scalar = math.exp(scalar) - if self.q is not None: - scalar = np.round(scalar / self.q) * self.q - return scalar - - def _inverse_transform(self, vector: Optional[np.ndarray]) -> Union[float, np.ndarray]: - if vector is None: - return np.NaN - - if self.log: - vector = np.log(vector) - return vector - - def get_neighbors(self, value: float, rs: np.random.RandomState, number: int = 4, - transform: bool = False) -> List[float]: - neighbors = [] - for i in range(number): - new_value = rs.normal(value, self.sigma) - - if self.lower is not None and self.upper is not None: - new_value = min(max(new_value, self.lower), self.upper) - - neighbors.append(new_value) - return neighbors - - def get_size(self) -> float: - if self.q is None: - return np.inf - elif self.lower is None: - return np.inf - else: - return np.rint((self.upper - self.lower) / self.q) + 1 - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - mu = self.mu - sigma = self.sigma - if self.lower is None: - return norm(loc=mu, scale=sigma).pdf(vector) - else: - mu = self.mu - sigma = self.sigma - lower = self._lower - upper = self._upper - a = (lower - mu) / sigma - b = (upper - mu) / sigma - - return truncnorm(a, b, loc=mu, scale=sigma).pdf(vector) - - def get_max_density(self) -> float: - if self.lower is None: - return self._pdf(np.array([self.mu]))[0] - - if self.mu < self._lower: - return self._pdf(np.array([self._lower]))[0] - elif self.mu > self._upper: - return self._pdf(np.array([self._upper]))[0] - else: - return self._pdf(np.array([self.mu]))[0] diff --git a/ConfigSpace/hyperparameters/normal_integer.pxd b/ConfigSpace/hyperparameters/normal_integer.pxd deleted file mode 100644 index d5e8e1a7..00000000 --- a/ConfigSpace/hyperparameters/normal_integer.pxd +++ /dev/null @@ -1,21 +0,0 @@ -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from .integer_hyperparameter cimport IntegerHyperparameter - - -cdef class NormalIntegerHyperparameter(IntegerHyperparameter): - cdef public mu - cdef public sigma - cdef public nfhp - cdef public normalization_constant diff --git a/ConfigSpace/hyperparameters/normal_integer.pyx b/ConfigSpace/hyperparameters/normal_integer.pyx deleted file mode 100644 index ea10d1d1..00000000 --- a/ConfigSpace/hyperparameters/normal_integer.pyx +++ /dev/null @@ -1,369 +0,0 @@ -from itertools import count -import io -from more_itertools import roundrobin -from typing import List, Any, Dict, Union, Optional -import warnings - -from scipy.stats import truncnorm, norm -import numpy as np -cimport numpy as np -np.import_array() - -from ConfigSpace.functional import center_range, arange_chunked -from ConfigSpace.hyperparameters.uniform_integer cimport UniformIntegerHyperparameter -from ConfigSpace.hyperparameters.normal_float cimport NormalFloatHyperparameter - -# OPTIM: Some operations generate an arange which could blowup memory if -# done over the entire space of integers (int32/64). -# To combat this, `arange_chunked` is used in scenarios where reducion -# operations over all the elments could be done in partial steps independantly. -# For example, a sum over the pdf values could be done in chunks. -# This may add some small overhead for smaller ranges but is unlikely to -# be noticable. -ARANGE_CHUNKSIZE = 10_000_000 - - -cdef class NormalIntegerHyperparameter(IntegerHyperparameter): - - def __init__(self, name: str, mu: int, sigma: Union[int, float], - default_value: Union[int, None] = None, q: Union[None, int] = None, - log: bool = False, - lower: Optional[int] = None, - upper: Optional[int] = None, - meta: Optional[Dict] = None) -> None: - r""" - A normally distributed integer hyperparameter. - - Its values are sampled from a normal distribution - :math:`\mathcal{N}(\mu, \sigma^2)`. - - >>> from ConfigSpace import NormalIntegerHyperparameter - >>> - >>> NormalIntegerHyperparameter(name='n', mu=0, sigma=1, log=False) - n, Type: NormalInteger, Mu: 0 Sigma: 1, Default: 0 - - Parameters - ---------- - name : str - Name of the hyperparameter with which it can be accessed - mu : int - Mean of the distribution, from which hyperparameter is sampled - sigma : int, float - Standard deviation of the distribution, from which - hyperparameter is sampled - default_value : int, optional - Sets the default value of a hyperparameter to a given value - q : int, optional - Quantization factor - log : bool, optional - If ``True``, the values of the hyperparameter will be sampled - on a logarithmic scale. Defaults to ``False`` - lower : int, float, optional - Lower bound of a range of values from which the hyperparameter will be sampled - upper : int, float, optional - Upper bound of a range of values from which the hyperparameter will be sampled - meta : Dict, optional - Field for holding meta data provided by the user. - Not used by the configuration space. - - """ - super(NormalIntegerHyperparameter, self).__init__(name, default_value, meta) - - self.mu = mu - self.sigma = sigma - - if default_value is not None: - default_value = self.check_int(default_value, self.name) - - if q is not None: - if q < 1: - warnings.warn("Setting quantization < 1 for Integer " - "Hyperparameter '%s' has no effect." % - name) - self.q = None - else: - self.q = self.check_int(q, "q") - else: - self.q = None - self.log = bool(log) - - if (lower is not None) ^ (upper is not None): - raise ValueError("Only one bound was provided when both lower and upper bounds must be provided.") - - if lower is not None and upper is not None: - self.upper = self.check_int(upper, "upper") - self.lower = self.check_int(lower, "lower") - if self.lower >= self.upper: - raise ValueError("Upper bound %d must be larger than lower bound " - "%d for hyperparameter %s" % - (self.lower, self.upper, name)) - elif log and self.lower <= 0: - raise ValueError("Negative lower bound (%d) for log-scale " - "hyperparameter %s is forbidden." % - (self.lower, name)) - self.lower = lower - self.upper = upper - - self.nfhp = NormalFloatHyperparameter(self.name, - self.mu, - self.sigma, - log=self.log, - q=self.q, - lower=self.lower, - upper=self.upper, - default_value=default_value) - - self.default_value = self.check_default(default_value) - self.normalized_default_value = self._inverse_transform(self.default_value) - - if (self.lower is None) or (self.upper is None): - # Since a bound is missing, the pdf cannot be normalized. Working with the unnormalized variant) - self.normalization_constant = 1 - else: - self.normalization_constant = self._compute_normalization() - - def __repr__(self) -> str: - repr_str = io.StringIO() - - if self.lower is None or self.upper is None: - repr_str.write("%s, Type: NormalInteger, Mu: %s Sigma: %s, Default: %s" % (self.name, repr(self.mu), repr(self.sigma), repr(self.default_value))) - else: - repr_str.write("%s, Type: NormalInteger, Mu: %s Sigma: %s, Range: [%s, %s], Default: %s" % (self.name, repr(self.mu), repr(self.sigma), repr(self.lower), repr(self.upper), repr(self.default_value))) - - if self.log: - repr_str.write(", on log-scale") - if self.q is not None: - repr_str.write(", Q: %s" % str(self.q)) - repr_str.seek(0) - return repr_str.getvalue() - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - return ( - self.name == other.name and - self.mu == other.mu and - self.sigma == other.sigma and - self.log == other.log and - self.q == other.q and - self.lower == other.lower and - self.upper == other.upper and - self.default_value == other.default_value - ) - - def __hash__(self): - return hash((self.name, self.mu, self.sigma, self.log, self.q, self.lower, self.upper)) - - def __copy__(self): - return NormalIntegerHyperparameter( - name=self.name, - default_value=self.default_value, - mu=self.mu, - sigma=self.sigma, - log=self.log, - q=self.q, - lower=self.lower, - upper=self.upper, - meta=self.meta - ) - - def to_uniform(self, z: int = 3) -> "UniformIntegerHyperparameter": - if self.lower is None or self.upper is None: - lb = np.round(int(self.mu - (z * self.sigma))) - ub = np.round(int(self.mu + (z * self.sigma))) - else: - lb = self.lower - ub = self.upper - - return UniformIntegerHyperparameter(self.name, - lb, - ub, - default_value=self.default_value, - q=self.q, log=self.log, meta=self.meta) - - def is_legal(self, value: int) -> bool: - return (isinstance(value, (int, np.integer))) and \ - (self.lower is None or value >= self.lower) and \ - (self.upper is None or value <= self.upper) - - cpdef bint is_legal_vector(self, DTYPE_t value): - return isinstance(value, float) or isinstance(value, int) - - def check_default(self, default_value: int) -> int: - if default_value is None: - if self.log: - return self._transform_scalar(self.mu) - else: - return self.mu - - elif self.is_legal(default_value): - return default_value - else: - raise ValueError("Illegal default value %s" % str(default_value)) - - def _sample(self, rs: np.random.RandomState, size: Optional[int] = None - ) -> Union[np.ndarray, float]: - value = self.nfhp._sample(rs, size=size) - # Map all floats which belong to the same integer value to the same - # float value by first transforming it to an integer and then - # transforming it back to a float between zero and one - value = self._transform(value) - value = self._inverse_transform(value) - return value - - cpdef np.ndarray _transform_vector(self, np.ndarray vector): - vector = self.nfhp._transform_vector(vector) - return np.rint(vector) - - cpdef long long _transform_scalar(self, double scalar): - scalar = self.nfhp._transform_scalar(scalar) - return int(np.round(scalar)) - - def _inverse_transform(self, vector: Union[np.ndarray, float, int] - ) -> Union[np.ndarray, float]: - return self.nfhp._inverse_transform(vector) - - def has_neighbors(self) -> bool: - return True - - def get_neighbors( - self, - value: Union[int, float], - rs: np.random.RandomState, - number: int = 4, - transform: bool = False, - ) -> List[int]: - stepsize = self.q if self.q is not None else 1 - bounded = self.lower is not None - mu = self.mu - sigma = self.sigma - - neighbors: set[int] = set() - center = self._transform(value) - - if not bounded: - float_indices = norm.rvs( - loc=mu, - scale=sigma, - size=number, - random_state=rs, - ) - else: - dist = truncnorm( - a = (self.lower - mu) / sigma, - b = (self.upper - mu) / sigma, - loc=center, - scale=sigma, - ) - - float_indices = dist.rvs( - size=number, - random_state=rs, - ) - - possible_neighbors = self._transform_vector(float_indices).astype(np.longlong) - - for possible_neighbor in possible_neighbors: - # If we already happen to have this neighbor, pick the closest - # number around it that is not arelady included - if possible_neighbor in neighbors or possible_neighbor == center: - - if bounded: - numbers_around = center_range(possible_neighbor, self.lower, self.upper, stepsize) - else: - decrement_count = count(possible_neighbor - stepsize, step=-stepsize) - increment_count = count(possible_neighbor + stepsize, step=stepsize) - numbers_around = roundrobin(decrement_count, increment_count) - - valid_numbers_around = ( - n for n in numbers_around - if (n not in neighbors and n != center) - ) - possible_neighbor = next(valid_numbers_around, None) - - if possible_neighbor is None: - raise ValueError( - f"Found no more eligble neighbors for value {center}" - f"\nfound {neighbors}" - ) - - # We now have a valid sample, add it to the list of neighbors - neighbors.add(possible_neighbor) - - if transform: - return [self._transform(neighbor) for neighbor in neighbors] - else: - return list(neighbors) - - def _compute_normalization(self): - if self.lower is None: - warnings.warn("Cannot normalize the pdf exactly for a NormalIntegerHyperparameter" - f" {self.name} without bounds. Skipping normalization for that hyperparameter.") - return 1 - - else: - if self.upper - self.lower > ARANGE_CHUNKSIZE: - a = (self.lower - self.mu) / self.sigma - b = (self.upper - self.mu) / self.sigma - confidence = 0.999999 - rv = truncnorm(a=a, b=b, loc=self.mu, scale=self.sigma) - u, v = rv.ppf((1 - confidence) / 2), rv.ppf((1 + confidence) / 2) - lb = max(u, self.lower) - ub = min(v, self.upper + 1) - else: - lb = self.lower - ub = self.upper + 1 - - chunks = arange_chunked(lb, ub, chunk_size=ARANGE_CHUNKSIZE) - return sum(self.nfhp.pdf(chunk).sum() for chunk in chunks) - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). Optimally, an IntegerHyperparameter - should have a corresponding float, which can be utlized for the calls - to the probability density function (see e.g. NormalIntegerHyperparameter) - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - return self.nfhp._pdf(vector) / self.normalization_constant - - def get_max_density(self): - chunks = arange_chunked(self.lower, self.upper + 1, chunk_size=ARANGE_CHUNKSIZE) - maximum = max(self.nfhp.pdf(chunk).max() for chunk in chunks) - return maximum / self.normalization_constant - - def get_size(self) -> float: - if self.lower is None: - return np.inf - else: - if self.q is None: - q = 1 - else: - q = self.q - return np.rint((self.upper - self.lower) / self.q) + 1 diff --git a/ConfigSpace/hyperparameters/numerical.pxd b/ConfigSpace/hyperparameters/numerical.pxd deleted file mode 100644 index 7984d06c..00000000 --- a/ConfigSpace/hyperparameters/numerical.pxd +++ /dev/null @@ -1,25 +0,0 @@ -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from .hyperparameter cimport Hyperparameter - - -cdef class NumericalHyperparameter(Hyperparameter): - cdef public lower - cdef public upper - cdef public q - cdef public log - cdef public _lower - cdef public _upper - cpdef int compare(self, value: Union[int, float, str], value2: Union[int, float, str]) - cpdef int compare_vector(self, DTYPE_t value, DTYPE_t value2) diff --git a/ConfigSpace/hyperparameters/numerical.pyx b/ConfigSpace/hyperparameters/numerical.pyx deleted file mode 100644 index de8f74c0..00000000 --- a/ConfigSpace/hyperparameters/numerical.pyx +++ /dev/null @@ -1,85 +0,0 @@ -from typing import Any, Dict, Optional, Union - -import numpy as np -cimport numpy as np -np.import_array() - - -cdef class NumericalHyperparameter(Hyperparameter): - - def __init__(self, name: str, default_value: Any, meta: Optional[Dict]) -> None: - super(NumericalHyperparameter, self).__init__(name, meta) - self.default_value = default_value - - def has_neighbors(self) -> bool: - return True - - def get_num_neighbors(self, value = None) -> float: - - return np.inf - - cpdef int compare(self, value: Union[int, float, str], value2: Union[int, float, str]): - if value < value2: - return -1 - elif value > value2: - return 1 - elif value == value2: - return 0 - - cpdef int compare_vector(self, DTYPE_t value, DTYPE_t value2): - if value < value2: - return -1 - elif value > value2: - return 1 - elif value == value2: - return 0 - - def allow_greater_less_comparison(self) -> bool: - return True - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - return ( - self.name == other.name and - self.default_value == other.default_value and - self.lower == other.lower and - self.upper == other.upper and - self.log == other.log and - self.q == other.q - ) - - def __hash__(self): - return hash( - ( - self.name, - self.lower, - self.upper, - self.log, - self.q - ) - ) - - def __copy__(self): - return self.__class__( - name=self.name, - default_value=self.default_value, - lower=self.lower, - upper=self.upper, - log=self.log, - q=self.q, - meta=self.meta - ) diff --git a/ConfigSpace/hyperparameters/ordinal.pyx b/ConfigSpace/hyperparameters/ordinal.pyx deleted file mode 100644 index ea90df40..00000000 --- a/ConfigSpace/hyperparameters/ordinal.pyx +++ /dev/null @@ -1,356 +0,0 @@ -from collections import OrderedDict -import copy -import io -from typing import Any, Dict, List, Optional, Tuple, Union - -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from ConfigSpace.hyperparameters.hyperparameter cimport Hyperparameter - - -cdef class OrdinalHyperparameter(Hyperparameter): - cdef public tuple sequence - cdef public int num_elements - cdef sequence_vector - cdef value_dict - - def __init__( - self, - name: str, - sequence: Union[List[Union[float, int, str]], Tuple[Union[float, int, str]]], - default_value: Union[str, int, float, None] = None, - meta: Optional[Dict] = None - ) -> None: - """ - An ordinal hyperparameter. - - Its values are sampled form a ``sequence`` of values. - The sequence of values from a ordinal hyperparameter is ordered. - - ``None`` is a forbidden value, please use a string constant instead and parse - it in your own code, see `here `_ - for further details. - - >>> from ConfigSpace import OrdinalHyperparameter - >>> - >>> OrdinalHyperparameter('o', sequence=['10', '20', '30']) - o, Type: Ordinal, Sequence: {10, 20, 30}, Default: 10 - - Parameters - ---------- - name : str - Name of the hyperparameter, with which it can be accessed. - sequence : list or tuple with (str, float, int) - ordered collection of values to sample hyperparameter from. - default_value : int, float, str, optional - Sets the default value of a hyperparameter to a given value. - meta : Dict, optional - Field for holding meta data provided by the user. - Not used by the configuration space. - """ - - # Remark - # Since the sequence can consist of elements from different types, - # they are stored into a dictionary in order to handle them as a - # numeric sequence according to their order/position. - super(OrdinalHyperparameter, self).__init__(name, meta) - if len(sequence) > len(set(sequence)): - raise ValueError( - "Ordinal Hyperparameter Sequence %s contain duplicate values." % sequence) - self.sequence = tuple(sequence) - self.num_elements = len(sequence) - self.sequence_vector = list(range(self.num_elements)) - self.default_value = self.check_default(default_value) - self.normalized_default_value = self._inverse_transform(self.default_value) - self.value_dict = OrderedDict() # type: OrderedDict[Union[int, float, str], int] - counter = 0 - for element in self.sequence: - self.value_dict[element] = counter - counter += 1 - - def __hash__(self): - return hash((self.name, self.sequence)) - - def __repr__(self) -> str: - """ - write out the parameter definition - """ - repr_str = io.StringIO() - repr_str.write("%s, Type: Ordinal, Sequence: {" % (self.name)) - for idx, seq in enumerate(self.sequence): - repr_str.write(str(seq)) - if idx < len(self.sequence) - 1: - repr_str.write(", ") - repr_str.write("}") - repr_str.write(", Default: ") - repr_str.write(str(self.default_value)) - repr_str.seek(0) - return repr_str.getvalue() - - def __eq__(self, other: Any) -> bool: - """ - This method implements a comparison between self and another - object. - - Additionally, it defines the __ne__() as stated in the - documentation from python: - By default, object implements __eq__() by using is, returning NotImplemented - in the case of a false comparison: True if x is y else NotImplemented. - For __ne__(), by default it delegates to __eq__() and inverts the result - unless it is NotImplemented. - - """ - if not isinstance(other, self.__class__): - return False - - return ( - self.name == other.name and - self.sequence == other.sequence and - self.default_value == other.default_value - ) - - def __copy__(self): - return OrdinalHyperparameter( - name=self.name, - sequence=copy.deepcopy(self.sequence), - default_value=self.default_value, - meta=self.meta - ) - - cpdef int compare(self, value: Union[int, float, str], value2: Union[int, float, str]): - if self.value_dict[value] < self.value_dict[value2]: - return -1 - elif self.value_dict[value] > self.value_dict[value2]: - return 1 - elif self.value_dict[value] == self.value_dict[value2]: - return 0 - - cpdef int compare_vector(self, DTYPE_t value, DTYPE_t value2): - if value < value2: - return -1 - elif value > value2: - return 1 - elif value == value2: - return 0 - - def is_legal(self, value: Union[int, float, str]) -> bool: - """ - check if a certain value is represented in the sequence - """ - return value in self.sequence - - cpdef bint is_legal_vector(self, DTYPE_t value): - return value in self.sequence_vector - - def check_default(self, default_value: Optional[Union[int, float, str]] - ) -> Union[int, float, str]: - """ - check if given default value is represented in the sequence. - If there's no default value we simply choose the - first element in our sequence as default. - """ - if default_value is None: - return self.sequence[0] - elif self.is_legal(default_value): - return default_value - else: - raise ValueError("Illegal default value %s" % str(default_value)) - - cpdef np.ndarray _transform_vector(self, np.ndarray vector): - if np.isnan(vector).any(): - raise ValueError('Vector %s contains NaN\'s' % vector) - - if np.equal(np.mod(vector, 1), 0): - return self.sequence[vector.astype(int)] - - raise ValueError("Can only index the choices of the ordinal " - "hyperparameter %s with an integer, but provided " - "the following float: %f" % (self, vector)) - - def _transform_scalar(self, scalar: Union[float, int]) -> Union[float, int, str]: - if scalar != scalar: - raise ValueError("Number %s is NaN" % scalar) - - if scalar % 1 == 0: - return self.sequence[int(scalar)] - - raise ValueError("Can only index the choices of the ordinal " - "hyperparameter %s with an integer, but provided " - "the following float: %f" % (self, scalar)) - - def _transform(self, vector: Union[np.ndarray, float, int] - ) -> Optional[Union[np.ndarray, float, int]]: - try: - if isinstance(vector, np.ndarray): - return self._transform_vector(vector) - return self._transform_scalar(vector) - except ValueError: - return None - - def _inverse_transform(self, vector: Optional[Union[np.ndarray, List, int, str, float]] - ) -> Union[float, List[int], List[str], List[float]]: - if vector is None: - return np.NaN - return self.sequence.index(vector) - - def get_seq_order(self) -> np.ndarray: - """ - return the ordinal sequence as numeric sequence - (according to the the ordering) from 1 to length of our sequence. - """ - return np.arange(0, self.num_elements) - - def get_order(self, value: Optional[Union[int, str, float]]) -> int: - """ - return the seuence position/order of a certain value from the sequence - """ - return self.value_dict[value] - - def get_value(self, idx: int) -> Union[int, str, float]: - """ - return the sequence value of a given order/position - """ - return list(self.value_dict.keys())[list(self.value_dict.values()).index(idx)] - - def check_order(self, val1: Union[int, str, float], val2: Union[int, str, float]) -> bool: - """ - check whether value1 is smaller than value2. - """ - idx1 = self.get_order(val1) - idx2 = self.get_order(val2) - if idx1 < idx2: - return True - else: - return False - - def _sample(self, rs: np.random.RandomState, size: Optional[int] = None) -> int: - """ - return a random sample from our sequence as order/position index - """ - return rs.randint(0, self.num_elements, size=size) - - def has_neighbors(self) -> bool: - """ - check if there are neighbors or we're only dealing with an - one-element sequence - """ - return len(self.sequence) > 1 - - def get_num_neighbors(self, value: Union[int, float, str]) -> int: - """ - return the number of existing neighbors in the sequence - """ - max_idx = len(self.sequence) - 1 - # check if there is only one value - if value == self.sequence[0] and value == self.sequence[max_idx]: - return 0 - elif value == self.sequence[0] or value == self.sequence[max_idx]: - return 1 - else: - return 2 - - def get_neighbors(self, value: Union[int, str, float], rs: None, number: int = 0, - transform: bool = False) -> List[Union[str, float, int]]: - """ - Return the neighbors of a given value. - Value must be in vector form. Ordinal name will not work. - """ - neighbors = [] - if transform: - if self.get_num_neighbors(value) < len(self.sequence): - index = self.get_order(value) - neighbor_idx1 = index - 1 - neighbor_idx2 = index + 1 - seq = self.get_seq_order() - - if neighbor_idx1 >= seq[0]: - candidate1 = self.get_value(neighbor_idx1) - if self.check_order(candidate1, value): - neighbors.append(candidate1) - if neighbor_idx2 < self.num_elements: - candidate2 = self.get_value(neighbor_idx2) - if self.check_order(value, candidate2): - neighbors.append(candidate2) - - else: - if self.get_num_neighbors(self.get_value(value)) < len(self.sequence): - index = value - neighbor_idx1 = index - 1 - neighbor_idx2 = index + 1 - seq = self.get_seq_order() - - if neighbor_idx1 < index and neighbor_idx1 >= seq[0]: - neighbors.append(neighbor_idx1) - if neighbor_idx2 > index and neighbor_idx2 < self.num_elements: - neighbors.append(neighbor_idx2) - - return neighbors - - def allow_greater_less_comparison(self) -> bool: - return True - - def pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the hyperparameter in - the original hyperparameter space (the one specified by the user). - For each parameter type, there is also a method _pdf which - operates on the transformed (and possibly normalized) hyperparameter - space. Only legal values return a positive probability density, - otherwise zero. The OrdinalHyperparameter is treated - as a UniformHyperparameter with regard to its probability density. - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - if vector.ndim != 1: - raise ValueError("Method pdf expects a one-dimensional numpy array") - return self._pdf(vector) - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the hyperparameter in - the transformed (and possibly normalized, depends on the hyperparameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). The OrdinalHyperparameter is treated - as a UniformHyperparameter with regard to its probability density. - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - if not np.all(np.isin(vector, self.sequence)): - raise ValueError(f"Some element in the vector {vector} is not in the sequence {self.sequence}.") - return np.ones_like(vector, dtype=np.float64) / self.num_elements - - def get_max_density(self) -> float: - return 1 / self.num_elements - - def get_size(self) -> float: - return len(self.sequence) diff --git a/ConfigSpace/hyperparameters/uniform_float.pxd b/ConfigSpace/hyperparameters/uniform_float.pxd deleted file mode 100644 index 65017c64..00000000 --- a/ConfigSpace/hyperparameters/uniform_float.pxd +++ /dev/null @@ -1,18 +0,0 @@ -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from ConfigSpace.hyperparameters.float_hyperparameter cimport FloatHyperparameter - - -cdef class UniformFloatHyperparameter(FloatHyperparameter): - pass diff --git a/ConfigSpace/hyperparameters/uniform_float.pyx b/ConfigSpace/hyperparameters/uniform_float.pyx deleted file mode 100644 index e073662a..00000000 --- a/ConfigSpace/hyperparameters/uniform_float.pyx +++ /dev/null @@ -1,236 +0,0 @@ -import io -import math -from typing import Any, Dict, List, Optional, Union - -import numpy as np -cimport numpy as np -np.import_array() - -from ConfigSpace.hyperparameters.uniform_integer cimport UniformIntegerHyperparameter - - -cdef class UniformFloatHyperparameter(FloatHyperparameter): - def __init__(self, name: str, lower: Union[int, float], upper: Union[int, float], - default_value: Union[int, float, None] = None, - q: Union[int, float, None] = None, log: bool = False, - meta: Optional[Dict] = None) -> None: - """ - A uniformly distributed float hyperparameter. - - Its values are sampled from a uniform distribution with values - from ``lower`` to ``upper``. - - >>> from ConfigSpace import UniformFloatHyperparameter - >>> - >>> UniformFloatHyperparameter('u', lower=10, upper=100, log = False) - u, Type: UniformFloat, Range: [10.0, 100.0], Default: 55.0 - - Parameters - ---------- - name : str - Name of the hyperparameter, with which it can be accessed - lower : int, float - Lower bound of a range of values from which the hyperparameter will be sampled - upper : int, float - Upper bound - default_value : int, float, optional - Sets the default value of a hyperparameter to a given value - q : int, float, optional - Quantization factor - log : bool, optional - If ``True``, the values of the hyperparameter will be sampled - on a logarithmic scale. Default to ``False`` - meta : Dict, optional - Field for holding meta data provided by the user. - Not used by the configuration space. - """ - super(UniformFloatHyperparameter, self).__init__(name, default_value, meta) - self.lower = float(lower) - self.upper = float(upper) - self.q = float(q) if q is not None else None - self.log = bool(log) - - if self.lower >= self.upper: - raise ValueError("Upper bound %f must be larger than lower bound " - "%f for hyperparameter %s" % - (self.upper, self.lower, name)) - elif log and self.lower <= 0: - raise ValueError("Negative lower bound (%f) for log-scale " - "hyperparameter %s is forbidden." % - (self.lower, name)) - - self.default_value = self.check_default(default_value) - - if self.log: - if self.q is not None: - lower = self.lower - (np.float64(self.q) / 2. - 0.0001) - upper = self.upper + (np.float64(self.q) / 2. - 0.0001) - else: - lower = self.lower - upper = self.upper - self._lower = np.log(lower) - self._upper = np.log(upper) - else: - if self.q is not None: - self._lower = self.lower - (self.q / 2. - 0.0001) - self._upper = self.upper + (self.q / 2. - 0.0001) - else: - self._lower = self.lower - self._upper = self.upper - if self.q is not None: - # There can be weird rounding errors, so we compare the result against self.q, see - # In [13]: 2.4 % 0.2 - # Out[13]: 0.1999999999999998 - if np.round((self.upper - self.lower) % self.q, 10) not in (0, self.q): - raise ValueError( - "Upper bound (%f) - lower bound (%f) must be a multiple of q (%f)" - % (self.upper, self.lower, self.q) - ) - - self.normalized_default_value = self._inverse_transform(self.default_value) - - def __repr__(self) -> str: - repr_str = io.StringIO() - repr_str.write("%s, Type: UniformFloat, Range: [%s, %s], Default: %s" % - (self.name, repr(self.lower), repr(self.upper), - repr(self.default_value))) - if self.log: - repr_str.write(", on log-scale") - if self.q is not None: - repr_str.write(", Q: %s" % str(self.q)) - repr_str.seek(0) - return repr_str.getvalue() - - def is_legal(self, value: Union[float]) -> bool: - if not (isinstance(value, float) or isinstance(value, int)): - return False - elif self.upper >= value >= self.lower: - return True - else: - return False - - cpdef bint is_legal_vector(self, DTYPE_t value): - if 1.0 >= value >= 0.0: - return True - else: - return False - - def check_default(self, default_value: Optional[float]) -> float: - if default_value is None: - if self.log: - default_value = np.exp((np.log(self.lower) + np.log(self.upper)) / 2.) - else: - default_value = (self.lower + self.upper) / 2. - default_value = np.round(float(default_value), 10) - - if self.is_legal(default_value): - return default_value - else: - raise ValueError("Illegal default value %s" % str(default_value)) - - def to_integer(self) -> "UniformIntegerHyperparameter": - # TODO check if conversion makes sense at all (at least two integer values possible!) - # todo check if params should be converted to int while class initialization - # or inside class itself - return UniformIntegerHyperparameter( - name=self.name, - lower=int(np.ceil(self.lower)), - upper=int(np.floor(self.upper)), - default_value=int(np.rint(self.default_value)), - q=int(np.rint(self.q)), - log=self.log, - ) - - def _sample(self, rs: np.random, size: Optional[int] = None) -> Union[float, np.ndarray]: - return rs.uniform(size=size) - - cpdef np.ndarray _transform_vector(self, np.ndarray vector): - if np.isnan(vector).any(): - raise ValueError('Vector %s contains NaN\'s' % vector) - vector = vector * (self._upper - self._lower) + self._lower - if self.log: - vector = np.exp(vector) - if self.q is not None: - vector = np.rint((vector - self.lower) / self.q) * self.q + self.lower - vector = np.minimum(vector, self.upper) - vector = np.maximum(vector, self.lower) - return np.maximum(self.lower, np.minimum(self.upper, vector)) - - cpdef double _transform_scalar(self, double scalar): - if scalar != scalar: - raise ValueError("Number %s is NaN" % scalar) - scalar = scalar * (self._upper - self._lower) + self._lower - if self.log: - scalar = math.exp(scalar) - if self.q is not None: - scalar = np.round((scalar - self.lower) / self.q) * self.q + self.lower - scalar = min(scalar, self.upper) - scalar = max(scalar, self.lower) - scalar = min(self.upper, max(self.lower, scalar)) - return scalar - - def _inverse_transform(self, vector: Union[np.ndarray, None] - ) -> Union[np.ndarray, float, int]: - if vector is None: - return np.NaN - if self.log: - vector = np.log(vector) - vector = (vector - self._lower) / (self._upper - self._lower) - vector = np.minimum(1.0, vector) - vector = np.maximum(0.0, vector) - return vector - - def get_neighbors( - self, - value: Any, - rs: np.random.RandomState, - number: int = 4, - transform: bool = False, - std: float = 0.2 - ) -> List[float]: - neighbors = [] # type: List[float] - while len(neighbors) < number: - neighbor = rs.normal(value, std) # type: float - if neighbor < 0 or neighbor > 1: - continue - if transform: - neighbors.append(self._transform(neighbor)) - else: - neighbors.append(neighbor) - return neighbors - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - # everything that comes into _pdf for a uniform variable should - # already be in [0, 1]-range, and if not, it's outside the upper - # or lower bound. - ub = 1 - lb = 0 - inside_range = ((lb <= vector) & (vector <= ub)).astype(int) - return inside_range / (self.upper - self.lower) - - def get_max_density(self) -> float: - return 1 / (self.upper - self.lower) - - def get_size(self) -> float: - if self.q is None: - return np.inf - else: - return np.rint((self.upper - self.lower) / self.q) + 1 diff --git a/ConfigSpace/hyperparameters/uniform_integer.pxd b/ConfigSpace/hyperparameters/uniform_integer.pxd deleted file mode 100644 index 3285a180..00000000 --- a/ConfigSpace/hyperparameters/uniform_integer.pxd +++ /dev/null @@ -1,18 +0,0 @@ -import numpy as np -cimport numpy as np -np.import_array() - -# We now need to fix a datatype for our arrays. I've used the variable -# DTYPE for this, which is assigned to the usual NumPy runtime -# type info object. -DTYPE = float -# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For -# every type in the numpy module there's a corresponding compile-time -# type with a _t-suffix. -ctypedef np.float_t DTYPE_t - -from ConfigSpace.hyperparameters.integer_hyperparameter cimport IntegerHyperparameter - - -cdef class UniformIntegerHyperparameter(IntegerHyperparameter): - pass diff --git a/ConfigSpace/hyperparameters/uniform_integer.pyx b/ConfigSpace/hyperparameters/uniform_integer.pyx deleted file mode 100644 index ce217363..00000000 --- a/ConfigSpace/hyperparameters/uniform_integer.pyx +++ /dev/null @@ -1,336 +0,0 @@ -import io -from typing import Dict, List, Optional, Union -import warnings - -import numpy as np -cimport numpy as np -np.import_array() - -from ConfigSpace.functional import center_range -from ConfigSpace.hyperparameters.uniform_float cimport UniformFloatHyperparameter - - -cdef class UniformIntegerHyperparameter(IntegerHyperparameter): - def __init__(self, name: str, lower: int, upper: int, default_value: Union[int, None] = None, - q: Union[int, None] = None, log: bool = False, - meta: Optional[Dict] = None) -> None: - """ - A uniformly distributed integer hyperparameter. - - Its values are sampled from a uniform distribution - with bounds ``lower`` and ``upper``. - - >>> from ConfigSpace import UniformIntegerHyperparameter - >>> - >>> UniformIntegerHyperparameter(name='u', lower=10, upper=100, log=False) - u, Type: UniformInteger, Range: [10, 100], Default: 55 - - Parameters - ---------- - name : str - Name of the hyperparameter with which it can be accessed - lower : int - Lower bound of a range of values from which the hyperparameter will be sampled - upper : int - upper bound - default_value : int, optional - Sets the default value of a hyperparameter to a given value - q : int, optional - Quantization factor - log : bool, optional - If ``True``, the values of the hyperparameter will be sampled - on a logarithmic scale. Defaults to ``False`` - meta : Dict, optional - Field for holding meta data provided by the user. - Not used by the configuration space. - """ - - super(UniformIntegerHyperparameter, self).__init__(name, default_value, meta) - self.lower = self.check_int(lower, "lower") - self.upper = self.check_int(upper, "upper") - if default_value is not None: - default_value = self.check_int(default_value, name) - - if q is not None: - if q < 1: - warnings.warn("Setting quantization < 1 for Integer " - "Hyperparameter '%s' has no effect." % - name) - self.q = None - else: - self.q = self.check_int(q, "q") - if (self.upper - self.lower) % self.q != 0: - raise ValueError( - "Upper bound (%d) - lower bound (%d) must be a multiple of q (%d)" - % (self.upper, self.lower, self.q) - ) - else: - self.q = None - self.log = bool(log) - - if self.lower >= self.upper: - raise ValueError("Upper bound %d must be larger than lower bound " - "%d for hyperparameter %s" % - (self.lower, self.upper, name)) - elif log and self.lower <= 0: - raise ValueError("Negative lower bound (%d) for log-scale " - "hyperparameter %s is forbidden." % - (self.lower, name)) - - self.default_value = self.check_default(default_value) - - self.ufhp = UniformFloatHyperparameter(self.name, - self.lower - 0.49999, - self.upper + 0.49999, - log=self.log, - default_value=self.default_value) - - self.normalized_default_value = self._inverse_transform(self.default_value) - - def __repr__(self) -> str: - repr_str = io.StringIO() - repr_str.write("%s, Type: UniformInteger, Range: [%s, %s], Default: %s" - % (self.name, repr(self.lower), - repr(self.upper), repr(self.default_value))) - if self.log: - repr_str.write(", on log-scale") - if self.q is not None: - repr_str.write(", Q: %s" % repr(self.q)) - repr_str.seek(0) - return repr_str.getvalue() - - def _sample(self, rs: np.random.RandomState, size: Optional[int] = None - ) -> Union[np.ndarray, float]: - value = self.ufhp._sample(rs, size=size) - # Map all floats which belong to the same integer value to the same - # float value by first transforming it to an integer and then - # transforming it back to a float between zero and one - value = self._transform(value) - value = self._inverse_transform(value) - return value - - cpdef np.ndarray _transform_vector(self, np.ndarray vector): - vector = self.ufhp._transform_vector(vector) - if self.q is not None: - vector = np.rint((vector - self.lower) / self.q) * self.q + self.lower - vector = np.minimum(vector, self.upper) - vector = np.maximum(vector, self.lower) - - return np.rint(vector) - - cpdef long long _transform_scalar(self, double scalar): - scalar = self.ufhp._transform_scalar(scalar) - if self.q is not None: - scalar = np.round((scalar - self.lower) / self.q) * self.q + self.lower - scalar = min(scalar, self.upper) - scalar = max(scalar, self.lower) - return int(np.round(scalar)) - - def _inverse_transform(self, vector: Union[np.ndarray, float, int] - ) -> Union[np.ndarray, float, int]: - return self.ufhp._inverse_transform(vector) - - def is_legal(self, value: int) -> bool: - if not (isinstance(value, (int, np.int32, np.int64))): - return False - elif self.upper >= value >= self.lower: - return True - else: - return False - - cpdef bint is_legal_vector(self, DTYPE_t value): - if 1.0 >= value >= 0.0: - return True - else: - return False - - def check_default(self, default_value: Union[int, float]) -> int: - if default_value is None: - if self.log: - default_value = np.exp((np.log(self.lower) + np.log(self.upper)) / 2.) - else: - default_value = (self.lower + self.upper) / 2. - default_value = int(np.round(default_value, 0)) - - if self.is_legal(default_value): - return default_value - else: - raise ValueError("Illegal default value %s" % str(default_value)) - - def has_neighbors(self) -> bool: - if self.log: - upper = np.exp(self.ufhp._upper) - lower = np.exp(self.ufhp._lower) - else: - upper = self.ufhp._upper - lower = self.ufhp._lower - - # If there is only one active value, this is not enough - if upper - lower >= 1: - return True - else: - return False - - def get_num_neighbors(self, value = None) -> int: - # If there is a value in the range, then that value is not a neighbor of itself - # so we need to remove one - if value is not None and self.lower <= value <= self.upper: - return self.upper - self.lower - 1 - else: - return self.upper - self.lower - - def get_neighbors( - self, - value: float, - rs: np.random.RandomState, - number: int = 4, - transform: bool = False, - std: float = 0.2, - ) -> List[int]: - """Get the neighbors of a value - - NOTE - ---- - **This assumes the value is in the unit-hypercube [0, 1]** - - Parameters - ---------- - value: float - The value to get neighbors around. This assume the ``value`` has been - converted to the [0, 1] range which can be done with ``_inverse_transform``. - - rs: RandomState - The random state to use - - number: int = 4 - How many neighbors to get - - transform: bool = False - Whether to transform this value from the unit cube, back to the - hyperparameter's specified range of values. - - std: float = 0.2 - The std. dev. to use in the [0, 1] hypercube space while sampling - for neighbors. - - Returns - ------- - List[int] - Some ``number`` of neighbors centered around ``value``. - """ - assert 0 <= value <= 1, ( - "For get neighbors of UniformIntegerHyperparameter, the value" - " if assumed to be in the unit-hypercube [0, 1]. If this was not" - " the behaviour assumed, please raise a ticket on github." - ) - assert number < 1000000, ( - "Can only generate less than 1 million neighbors." - ) - # Convert python values to cython ones - cdef long long center = self._transform(value) - cdef long long lower = self.lower - cdef long long upper = self.upper - cdef unsigned int n_requested = number - cdef unsigned long long n_neighbors = upper - lower - 1 - cdef long long stepsize = self.q if self.q is not None else 1 - - neighbors = [] - - cdef long long v # A value that's possible to return - if n_neighbors < n_requested: - - for v in range(lower, center): - neighbors.append(v) - - for v in range(center + 1, upper + 1): - neighbors.append(v) - - if transform: - return neighbors - else: - return self._inverse_transform(np.asarray(neighbors)).tolist() - - # A truncated normal between 0 and 1, centered on the value with a scale of std. - # This will be sampled from and converted to the corresponding int value - # However, this is too slow - we use the "poor man's truncnorm below" - # cdef np.ndarray float_indices = truncnorm.rvs( - # a=(0 - value) / std, - # b=(1 - value) / std, - # loc=value, - # scale=std, - # size=number, - # random_state=rs - # ) - # We sample five times as many values as needed and weed them out below - # (perform rejection sampling and make sure we don't sample any neighbor twice) - # This increases our chances of not having to fill the neighbors list by calling - # `center_range` - # Five is an arbitrary number and can probably be tuned to reduce overhead - cdef np.ndarray float_indices = rs.normal(value, std, size=number * 5) - cdef np.ndarray mask = (float_indices >= 0) & (float_indices <= 1) - float_indices = float_indices[mask] - - cdef np.ndarray possible_neighbors_as_array = self._transform_vector(float_indices).astype(np.longlong) - cdef long long [:] possible_neighbors = possible_neighbors_as_array - - cdef unsigned int n_neighbors_generated = 0 - cdef unsigned int n_candidates = len(float_indices) - cdef unsigned int candidate_index = 0 - cdef set seen = {center} - while n_neighbors_generated < n_requested and candidate_index < n_candidates: - v = possible_neighbors[candidate_index] - if v not in seen: - seen.add(v) - n_neighbors_generated += 1 - candidate_index += 1 - - if n_neighbors_generated < n_requested: - numbers_around = center_range(center, lower, upper, stepsize) - - while n_neighbors_generated < n_requested: - v = next(numbers_around) - if v not in seen: - seen.add(v) - n_neighbors_generated += 1 - - seen.remove(center) - neighbors = list(seen) - if transform: - return neighbors - else: - return self._inverse_transform(np.array(neighbors)).tolist() - - def _pdf(self, vector: np.ndarray) -> np.ndarray: - """ - Computes the probability density function of the parameter in - the transformed (and possibly normalized, depends on the parameter - type) space. As such, one never has to worry about log-normal - distributions, only normal distributions (as the inverse_transform - in the pdf method handles these). Optimally, an IntegerHyperparameter - should have a corresponding float, which can be utlized for the calls - to the probability density function (see e.g. NormalIntegerHyperparameter) - - Parameters - ---------- - vector: np.ndarray - the (N, ) vector of inputs for which the probability density - function is to be computed. - - Returns - ---------- - np.ndarray(N, ) - Probability density values of the input vector - """ - return self.ufhp._pdf(vector) - - def get_max_density(self) -> float: - lb = self.lower - ub = self.upper - return 1 / (ub - lb + 1) - - def get_size(self) -> float: - if self.q is None: - q = 1 - else: - q = self.q - return np.rint((self.upper - self.lower) / q) + 1 diff --git a/ConfigSpace/nx/__init__.py b/ConfigSpace/nx/__init__.py deleted file mode 100644 index 380aec8e..00000000 --- a/ConfigSpace/nx/__init__.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (C) 2004-2010 by -# Aric Hagberg -# Dan Schult -# Pieter Swart -# All rights reserved. -# BSD license. -# -# Add platform dependent shared library path to sys.path -# -# Modified by Matthias Feurer for the package HPOlibConfigSpace - - -# Release data -from ConfigSpace.nx.release import authors, date, license, version - -__author__ = "%s <%s>\n%s <%s>\n%s <%s>" % ( - authors["Hagberg"] + authors["Schult"] + authors["Swart"] -) -__license__ = license - -__date__ = date -__version__ = version - -from ConfigSpace.nx.algorithms import ( - ancestors, - descendants, - is_aperiodic, - is_directed_acyclic_graph, - simple_cycles, - strongly_connected_components, - topological_sort, - topological_sort_recursive, -) -from ConfigSpace.nx.classes import DiGraph, Graph -from ConfigSpace.nx.exception import ( - NetworkXAlgorithmError, - NetworkXError, - NetworkXException, - NetworkXNoPath, - NetworkXNotImplemented, - NetworkXPointlessConcept, - NetworkXUnbounded, - NetworkXUnfeasible, -) - -__all__ = [ - "NetworkXException", - "NetworkXError", - "NetworkXPointlessConcept", - "NetworkXAlgorithmError", - "NetworkXUnfeasible", - "NetworkXNoPath", - "NetworkXUnbounded", - "NetworkXNotImplemented", - "Graph", - "DiGraph", - "descendants", - "ancestors", - "topological_sort", - "topological_sort_recursive", - "is_directed_acyclic_graph", - "is_aperiodic", - "simple_cycles", - "strongly_connected_components", -] diff --git a/ConfigSpace/nx/algorithms/__init__.py b/ConfigSpace/nx/algorithms/__init__.py deleted file mode 100644 index 35b0c488..00000000 --- a/ConfigSpace/nx/algorithms/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from ConfigSpace.nx.algorithms.components import strongly_connected_components -from ConfigSpace.nx.algorithms.cycles import simple_cycles -from ConfigSpace.nx.algorithms.dag import ( - ancestors, - descendants, - is_aperiodic, - is_directed_acyclic_graph, - topological_sort, - topological_sort_recursive, -) - -__all__ = [ - "descendants", - "ancestors", - "topological_sort", - "topological_sort_recursive", - "is_directed_acyclic_graph", - "is_aperiodic", - "simple_cycles", - "strongly_connected_components", -] diff --git a/ConfigSpace/nx/algorithms/components/__init__.py b/ConfigSpace/nx/algorithms/components/__init__.py deleted file mode 100644 index 48c8934e..00000000 --- a/ConfigSpace/nx/algorithms/components/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from ConfigSpace.nx.algorithms.components.strongly_connected import strongly_connected_components - -__all__ = ["strongly_connected_components"] diff --git a/ConfigSpace/nx/algorithms/components/strongly_connected.py b/ConfigSpace/nx/algorithms/components/strongly_connected.py deleted file mode 100644 index 60dcc984..00000000 --- a/ConfigSpace/nx/algorithms/components/strongly_connected.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Strongly connected components.""" -# Copyright (C) 2004-2011 by -# Aric Hagberg -# Dan Schult -# Pieter Swart -# All rights reserved. -# BSD license. -from __future__ import annotations - -import ConfigSpace.nx - -__authors__ = "\n".join( - [ - "Eben Kenah", - "Aric Hagberg (hagberg@lanl.gov)" "Christopher Ellison", - "Ben Edwards (bedwards@cs.unm.edu)", - ], -) - -__all__ = ["strongly_connected_components"] - - -def strongly_connected_components(G): - """Return nodes in strongly connected components of graph. - - Parameters - ---------- - G : NetworkX Graph - An directed graph. - - Returns - ------- - comp : list of lists - A list of nodes for each component of G. - The list is ordered from largest connected component to smallest. - - Raises - ------ - NetworkXError: If G is undirected. - - See Also - -------- - connected_components, weakly_connected_components - - Notes - ----- - Uses Tarjan's algorithm with Nuutila's modifications. - Nonrecursive version of algorithm. - - References - ---------- - .. [1] Depth-first search and linear graph algorithms, R. Tarjan - SIAM Journal of Computing 1(2):146-160, (1972). - - .. [2] On finding the strongly connected components in a directed graph. - E. Nuutila and E. Soisalon-Soinen - Information Processing Letters 49(1): 9-14, (1994).. - """ - if not G.is_directed(): - raise ConfigSpace.nx.NetworkXError( - """Not allowed for undirected graph G. - Use connected_components() """, - ) - preorder = {} - lowlink = {} - scc_found = {} - scc_queue = [] - scc_list = [] - i = 0 # Preorder counter - for source in G: - if source not in scc_found: - queue = [source] - while queue: - v = queue[-1] - if v not in preorder: - i = i + 1 - preorder[v] = i - done = 1 - v_nbrs = G[v] - for w in v_nbrs: - if w not in preorder: - queue.append(w) - done = 0 - break - if done == 1: - lowlink[v] = preorder[v] - for w in v_nbrs: - if w not in scc_found: - if preorder[w] > preorder[v]: - lowlink[v] = min([lowlink[v], lowlink[w]]) - else: - lowlink[v] = min([lowlink[v], preorder[w]]) - queue.pop() - if lowlink[v] == preorder[v]: - scc_found[v] = True - scc = [v] - while scc_queue and preorder[scc_queue[-1]] > preorder[v]: - k = scc_queue.pop() - scc_found[k] = True - scc.append(k) - scc_list.append(scc) - else: - scc_queue.append(v) - scc_list.sort(key=len, reverse=True) - return scc_list diff --git a/ConfigSpace/nx/algorithms/cycles.py b/ConfigSpace/nx/algorithms/cycles.py deleted file mode 100644 index 945316c2..00000000 --- a/ConfigSpace/nx/algorithms/cycles.py +++ /dev/null @@ -1,139 +0,0 @@ -""" -======================== -Cycle finding algorithms -========================. -""" -# Copyright (C) 2010-2012 by -# Aric Hagberg -# Dan Schult -# Pieter Swart -# All rights reserved. -# BSD license. -from __future__ import annotations - -from collections import defaultdict - -import ConfigSpace.nx - -__all__ = ["simple_cycles"] -__author__ = "\n".join( - [ - "Jon Olav Vik ", - "Dan Schult ", - "Aric Hagberg ", - ], -) - - -def simple_cycles(G): - """Find simple cycles (elementary circuits) of a directed graph. - - An simple cycle, or elementary circuit, is a closed path where no - node appears twice, except that the first and last node are the same. - Two elementary circuits are distinct if they are not cyclic permutations - of each other. - - This is a nonrecursive, iterator/generator version of Johnson's - algorithm [1]_. There may be better algorithms for some cases [2]_ [3]_. - - Parameters - ---------- - G : NetworkX DiGraph - A directed graph - - Returns - ------- - cycle_generator: generator - A generator that produces elementary cycles of the graph. Each cycle is - a list of nodes with the first and last nodes being the same. - - Examples - -------- - >>> G = nx.DiGraph([(0, 0), (0, 1), (0, 2), (1, 2), (2, 0), (2, 1), (2, 2)]) - >>> list(nx.simple_cycles(G)) - [[2], [2, 1], [2, 0], [2, 0, 1], [0]] - - Notes - ----- - The implementation follows pp. 79-80 in [1]_. - - The time complexity is O((n+e)(c+1)) for n nodes, e edges and c - elementary circuits. - - To filter the cycles so that they don't include certain nodes or edges, - copy your graph and eliminate those nodes or edges before calling. - >>> copyG = G.copy() - >>> copyG.remove_nodes_from([1]) - >>> copyG.remove_edges_from([(0,1)]) - >>> list(nx.simple_cycles(copyG)) - [[2], [2, 0], [0]] - - References - ---------- - .. [1] Finding all the elementary circuits of a directed graph. - D. B. Johnson, SIAM Journal on Computing 4, no. 1, 77-84, 1975. - http://dx.doi.org/10.1137/0204007 - - .. [2] Enumerating the cycles of a digraph: a new preprocessing strategy. - G. Loizou and P. Thanish, Information Sciences, v. 27, 163-182, 1982. - - .. [3] A search strategy for the elementary cycles of a directed graph. - J.L. Szwarcfiter and P.E. Lauer, BIT NUMERICAL MATHEMATICS, - v. 16, no. 2, 192-204, 1976. - - See Also - -------- - cycle_basis - """ - - def _unblock(thisnode, blocked, B): - stack = {thisnode} - while stack: - node = stack.pop() - if node in blocked: - blocked.remove(node) - stack.update(B[node]) - B[node].clear() - - # Johnson's algorithm requires some ordering of the nodes. - # We assign the arbitrary ordering given by the strongly connected comps - # There is no need to track the ordering as each node removed as processed. - subG = G.copy() # save the actual graph so we can mutate it here - sccs = ConfigSpace.nx.strongly_connected_components(subG) - while sccs: - scc = sccs.pop() - # order of scc determines ordering of nodes - startnode = scc.pop() - # Processing node runs "circuit" routine from recursive version - path = [startnode] - blocked = set() # vertex: blocked from search? - closed = set() # nodes involved in a cycle - blocked.add(startnode) - B = defaultdict(set) # graph portions that yield no elementary circuit - stack = [(startnode, list(subG[startnode]))] # subG gives component nbrs - while stack: - thisnode, nbrs = stack[-1] - if nbrs: - nextnode = nbrs.pop() - if nextnode == startnode: - yield path[:] - closed.update(path) - elif nextnode not in blocked: - path.append(nextnode) - stack.append((nextnode, list(subG[nextnode]))) - blocked.add(nextnode) - continue - # done with nextnode... look for more neighbors - if not nbrs: # no more nbrs - if thisnode in closed: - _unblock(thisnode, blocked, B) - else: - for nbr in G[thisnode]: - if thisnode not in B[nbr]: - B[nbr].add(thisnode) - stack.pop() - path.pop() - # done processing this node - subG.remove_node(startnode) - H = subG.subgraph(scc) # make smaller to avoid work in SCC routine - sccs.extend(ConfigSpace.nx.strongly_connected_components(H)) diff --git a/ConfigSpace/nx/algorithms/dag.py b/ConfigSpace/nx/algorithms/dag.py deleted file mode 100644 index 2fed3405..00000000 --- a/ConfigSpace/nx/algorithms/dag.py +++ /dev/null @@ -1,293 +0,0 @@ -from __future__ import annotations - -try: - # >= Python 3.9 - from math import gcd # type: ignore -except ImportError: - # < Python 3.9 - from fractions import gcd # type: ignore - -import ConfigSpace.nx - -"""Algorithms for directed acyclic graphs (DAGs).""" -# Copyright (C) 2006-2011 by -# Aric Hagberg -# Dan Schult -# Pieter Swart -# All rights reserved. -# BSD license. -__author__ = """\n""".join( - [ - "Aric Hagberg ", - "Dan Schult (dschult@colgate.edu)", - "Ben Edwards (bedwards@cs.unm.edu)", - ], -) -__all__ = [ - "descendants", - "ancestors", - "topological_sort", - "topological_sort_recursive", - "is_directed_acyclic_graph", - "is_aperiodic", -] - - -def descendants(G, source): - """Return all nodes reachable from `source` in G. - - Parameters - ---------- - G : NetworkX DiGraph - source : node in G - - Returns - ------- - des : set() - The descendants of source in G - """ - if not G.has_node(source): - raise ConfigSpace.nx.NetworkXError("The node %s is not in the graph." % source) - des = set(ConfigSpace.nx.shortest_path_length(G, source=source).keys()) - {source} - return des - - -def ancestors(G, source): - """Return all nodes having a path to `source` in G. - - Parameters - ---------- - G : NetworkX DiGraph - source : node in G - - Returns - ------- - ancestors : set() - The ancestors of source in G - """ - if not G.has_node(source): - raise ConfigSpace.nx.NetworkXError("The node %s is not in the graph." % source) - anc = set(ConfigSpace.nx.shortest_path_length(G, target=source).keys()) - {source} - return anc - - -def is_directed_acyclic_graph(G): - """Return True if the graph G is a directed acyclic graph (DAG) or - False if not. - - Parameters - ---------- - G : NetworkX graph - A graph - - Returns - ------- - is_dag : bool - True if G is a DAG, false otherwise - """ - if not G.is_directed(): - return False - try: - topological_sort(G) - return True - except ConfigSpace.nx.NetworkXUnfeasible: - return False - - -def topological_sort(G, nbunch=None): - """Return a list of nodes in topological sort order. - - A topological sort is a nonunique permutation of the nodes - such that an edge from u to v implies that u appears before v in the - topological sort order. - - Parameters - ---------- - G : NetworkX digraph - A directed graph - - nbunch : container of nodes (optional) - Explore graph in specified order given in nbunch - - Raises - ------ - NetworkXError - Topological sort is defined for directed graphs only. If the - graph G is undirected, a NetworkXError is raised. - - NetworkXUnfeasible - If G is not a directed acyclic graph (DAG) no topological sort - exists and a NetworkXUnfeasible exception is raised. - - Notes - ----- - This algorithm is based on a description and proof in - The Algorithm Design Manual [1]_ . - - See Also - -------- - is_directed_acyclic_graph - - References - ---------- - .. [1] Skiena, S. S. The Algorithm Design Manual (Springer-Verlag, 1998). - http://www.amazon.com/exec/obidos/ASIN/0387948600/ref=ase_thealgorithmrepo/ - """ - if not G.is_directed(): - raise ConfigSpace.nx.NetworkXError("Topological sort not defined on undirected graphs.") - - # nonrecursive version - seen = set() - order = [] - explored = set() - - if nbunch is None: - nbunch = G.nodes_iter() - for v in nbunch: # process all vertices in G - if v in explored: - continue - fringe = [v] # nodes yet to look at - while fringe: - w = fringe[-1] # depth first search - if w in explored: # already looked down this branch - fringe.pop() - continue - seen.add(w) # mark as seen - # Check successors for cycles and for new nodes - new_nodes = [] - for n in G[w]: - if n not in explored: - if n in seen: # CYCLE !! - raise ConfigSpace.nx.NetworkXUnfeasible("Graph contains a cycle.") - new_nodes.append(n) - if new_nodes: # Add new_nodes to fringe - fringe.extend(new_nodes) - else: # No new nodes so w is fully explored - explored.add(w) - order.append(w) - fringe.pop() # done considering this node - return list(reversed(order)) - - -def topological_sort_recursive(G, nbunch=None): - """Return a list of nodes in topological sort order. - - A topological sort is a nonunique permutation of the nodes such - that an edge from u to v implies that u appears before v in the - topological sort order. - - Parameters - ---------- - G : NetworkX digraph - - nbunch : container of nodes (optional) - Explore graph in specified order given in nbunch - - Raises - ------ - NetworkXError - Topological sort is defined for directed graphs only. If the - graph G is undirected, a NetworkXError is raised. - - NetworkXUnfeasible - If G is not a directed acyclic graph (DAG) no topological sort - exists and a NetworkXUnfeasible exception is raised. - - Notes - ----- - This is a recursive version of topological sort. - - See Also - -------- - topological_sort - is_directed_acyclic_graph - - """ - if not G.is_directed(): - raise ConfigSpace.nx.NetworkXError("Topological sort not defined on undirected graphs.") - - def _dfs(v): - ancestors.add(v) - - for w in G[v]: - if w in ancestors: - raise ConfigSpace.nx.NetworkXUnfeasible("Graph contains a cycle.") - - if w not in explored: - _dfs(w) - - ancestors.remove(v) - explored.add(v) - order.append(v) - - ancestors = set() - explored = set() - order = [] - - if nbunch is None: - nbunch = G.nodes_iter() - - for v in nbunch: - if v not in explored: - _dfs(v) - - return list(reversed(order)) - - -def is_aperiodic(G): - """Return True if G is aperiodic. - - A directed graph is aperiodic if there is no integer k > 1 that - divides the length of every cycle in the graph. - - Parameters - ---------- - G : NetworkX DiGraph - Graph - - Returns - ------- - aperiodic : boolean - True if the graph is aperiodic False otherwise - - Raises - ------ - NetworkXError - If G is not directed - - Notes - ----- - This uses the method outlined in [1]_, which runs in O(m) time - given m edges in G. Note that a graph is not aperiodic if it is - acyclic as every integer trivial divides length 0 cycles. - - References - ---------- - .. [1] Jarvis, J. P.; Shier, D. R. (1996), - Graph-theoretic analysis of finite Markov chains, - in Shier, D. R.; Wallenius, K. T., Applied Mathematical Modeling: - A Multidisciplinary Approach, CRC Press. - """ - if not G.is_directed(): - raise ConfigSpace.nx.NetworkXError("is_aperiodic not defined for undirected graphs") - - s = next(G.nodes_iter()) - levels = {s: 0} - this_level = [s] - g = 0 - level = 1 - while this_level: - next_level = [] - for u in this_level: - for v in G[u]: - if v in levels: # Non-Tree Edge - g = gcd(g, levels[u] - levels[v] + 1) - else: # Tree Edge - next_level.append(v) - levels[v] = level - this_level = next_level - level += 1 - if len(levels) == len(G): # All nodes in tree - return g == 1 - else: - return g == 1 and ConfigSpace.nx.is_aperiodic(G.subgraph(set(G) - set(levels))) diff --git a/ConfigSpace/nx/classes/__init__.py b/ConfigSpace/nx/classes/__init__.py deleted file mode 100644 index bdd0332a..00000000 --- a/ConfigSpace/nx/classes/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from ConfigSpace.nx.classes.digraph import DiGraph -from ConfigSpace.nx.classes.graph import Graph - -__all__ = ["Graph", "DiGraph"] diff --git a/ConfigSpace/nx/classes/digraph.py b/ConfigSpace/nx/classes/digraph.py deleted file mode 100644 index 277fffcf..00000000 --- a/ConfigSpace/nx/classes/digraph.py +++ /dev/null @@ -1,1156 +0,0 @@ -"""Base class for directed graphs.""" -# Copyright (C) 2004-2011 by -# Aric Hagberg -# Dan Schult -# Pieter Swart -# All rights reserved. -# BSD license. -from __future__ import annotations - -import collections -from copy import deepcopy - -from ConfigSpace.nx.classes.graph import Graph -from ConfigSpace.nx.exception import NetworkXError - -__author__ = """\n""".join( - [ - "Aric Hagberg (hagberg@lanl.gov)", - "Pieter Swart (swart@lanl.gov)", - "Dan Schult(dschult@colgate.edu)", - ], -) - - -class DiGraph(Graph): - """ - Base class for directed graphs. - - A DiGraph stores nodes and edges with optional data, or attributes. - - DiGraphs hold directed edges. Self loops are allowed but multiple - (parallel) edges are not. - - Nodes can be arbitrary (hashable) Python objects with optional - key/value attributes. - - Edges are represented as links between nodes with optional - key/value attributes. - - Parameters - ---------- - data : input graph - Data to initialize graph. If data=None (default) an empty - graph is created. The data can be an edge list, or any - NetworkX graph object. If the corresponding optional Python - packages are installed the data can also be a NumPy matrix - or 2d ndarray, a SciPy sparse matrix, or a PyGraphviz graph. - attr : keyword arguments, optional (default= no attributes) - Attributes to add to graph as key=value pairs. - - See Also - -------- - Graph - MultiGraph - MultiDiGraph - - Examples - -------- - Create an empty graph structure (a "null graph") with no nodes and - no edges. - - >>> G = nx.DiGraph() - - G can be grown in several ways. - - **Nodes:** - - Add one node at a time: - - >>> G.add_node(1) - - Add the nodes from any container (a list, dict, set or - even the lines from a file or the nodes from another graph). - - >>> G.add_nodes_from([2,3]) - >>> G.add_nodes_from(range(100,110)) - >>> H=nx.Graph() - >>> H.add_path([0,1,2,3,4,5,6,7,8,9]) - >>> G.add_nodes_from(H) - - In addition to strings and integers any hashable Python object - (except None) can represent a node, e.g. a customized node object, - or even another Graph. - - >>> G.add_node(H) - - **Edges:** - - G can also be grown by adding edges. - - Add one edge, - - >>> G.add_edge(1, 2) - - a list of edges, - - >>> G.add_edges_from([(1,2),(1,3)]) - - or a collection of edges, - - >>> G.add_edges_from(H.edges()) - - If some edges connect nodes not yet in the graph, the nodes - are added automatically. There are no errors when adding - nodes or edges that already exist. - - **Attributes:** - - Each graph, node, and edge can hold key/value attribute pairs - in an associated attribute dictionary (the keys must be hashable). - By default these are empty, but can be added or changed using - add_edge, add_node or direct manipulation of the attribute - dictionaries named graph, node and edge respectively. - - >>> G = nx.DiGraph(day="Friday") - >>> G.graph - {'day': 'Friday'} - - Add node attributes using add_node(), add_nodes_from() or G.node - - >>> G.add_node(1, time='5pm') - >>> G.add_nodes_from([3], time='2pm') - >>> G.node[1] - {'time': '5pm'} - >>> G.node[1]['room'] = 714 - >>> del G.node[1]['room'] # remove attribute - >>> G.nodes(data=True) - [(1, {'time': '5pm'}), (3, {'time': '2pm'})] - - Warning: adding a node to G.node does not add it to the graph. - - Add edge attributes using add_edge(), add_edges_from(), subscript - notation, or G.edge. - - >>> G.add_edge(1, 2, weight=4.7 ) - >>> G.add_edges_from([(3,4),(4,5)], color='red') - >>> G.add_edges_from([(1,2,{'color':'blue'}), (2,3,{'weight':8})]) - >>> G[1][2]['weight'] = 4.7 - >>> G.edge[1][2]['weight'] = 4 - - **Shortcuts:** - - Many common graph features allow python syntax to speed reporting. - - >>> 1 in G # check if node in graph - True - >>> [n for n in G if n<3] # iterate through nodes - [1, 2] - >>> len(G) # number of nodes in graph - 5 - - The fastest way to traverse all edges of a graph is via - adjacency_iter(), but the edges() method is often more convenient. - - >>> for n,nbrsdict in G.adjacency_iter(): - ... for nbr,eattr in nbrsdict.items(): - ... if 'weight' in eattr: - ... (n,nbr,eattr['weight']) - (1, 2, 4) - (2, 3, 8) - >>> [ (u,v,edata['weight']) for u,v,edata in G.edges(data=True) if 'weight' in edata ] - [(1, 2, 4), (2, 3, 8)] - - **Reporting:** - - Simple graph information is obtained using methods. - Iterator versions of many reporting methods exist for efficiency. - Methods exist for reporting nodes(), edges(), neighbors() and degree() - as well as the number of nodes and edges. - - For details on these and other miscellaneous methods, see below. - """ - - def __init__(self, data=None, **attr): - """Initialize a graph with edges, name, graph attributes. - - Parameters - ---------- - data : input graph - Data to initialize graph. If data=None (default) an empty - graph is created. The data can be an edge list, or any - NetworkX graph object. If the corresponding optional Python - packages are installed the data can also be a NumPy matrix - or 2d ndarray, a SciPy sparse matrix, or a PyGraphviz graph. - name : string, optional (default='') - An optional name for the graph. - attr : keyword arguments, optional (default= no attributes) - Attributes to add to graph as key=value pairs. - - See Also - -------- - convert - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G = nx.Graph(name='my graph') - >>> e = [(1,2),(2,3),(3,4)] # list of edges - >>> G = nx.Graph(e) - - Arbitrary graph attribute pairs (key=value) may be assigned - - >>> G=nx.Graph(e, day="Friday") - >>> G.graph - {'day': 'Friday'} - - """ - self.graph = collections.OrderedDict() # dictionary for graph attributes - self.node = collections.OrderedDict() # dictionary for node attributes - # We store two adjacency lists: - # the predecessors of node n are stored in the dict self.pred - # the successors of node n are stored in the dict self.succ=self.adj - self.adj = collections.OrderedDict() # empty adjacency dictionary - self.pred = collections.OrderedDict() # predecessor - self.succ = self.adj # successor - - # attempt to load graph with data - # if data is not None: - # load graph attributes (must be after convert) - self.graph.update(attr) - self.edge = self.adj - - def add_node(self, n, attr_dict=None, **attr): - """Add a single node n and update node attributes. - - Parameters - ---------- - n : node - A node can be any hashable Python object except None. - attr_dict : dictionary, optional (default= no attributes) - Dictionary of node attributes. Key/value pairs will - update existing data associated with the node. - attr : keyword arguments, optional - Set or change attributes using key=value. - - See Also - -------- - add_nodes_from - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_node(1) - >>> G.add_node('Hello') - >>> K3 = nx.Graph([(0,1),(1,2),(2,0)]) - >>> G.add_node(K3) - >>> G.number_of_nodes() - 3 - - Use keywords set/change node attributes: - - >>> G.add_node(1,size=10) - >>> G.add_node(3,weight=0.4,UTM=('13S',382871,3972649)) - - Notes - ----- - A hashable object is one that can be used as a key in a Python - dictionary. This includes strings, numbers, tuples of strings - and numbers, etc. - - On many platforms hashable items also include mutables such as - NetworkX Graphs, though one should be careful that the hash - doesn't change on mutables. - """ - # set up attribute dict - if attr_dict is None: - attr_dict = attr - else: - try: - attr_dict.update(attr) - except AttributeError: - raise NetworkXError("The attr_dict argument must be a dictionary.") - if n not in self.succ: - self.succ[n] = collections.OrderedDict() - self.pred[n] = collections.OrderedDict() - self.node[n] = attr_dict - else: # update attr even if node already exists - self.node[n].update(attr_dict) - - def add_nodes_from(self, nodes, **attr): - """Add multiple nodes. - - Parameters - ---------- - nodes : iterable container - A container of nodes (list, dict, set, etc.). - OR - A container of (node, attribute dict) tuples. - Node attributes are updated using the attribute dict. - attr : keyword arguments, optional (default= no attributes) - Update attributes for all nodes in nodes. - Node attributes specified in nodes as a tuple - take precedence over attributes specified generally. - - See Also - -------- - add_node - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_nodes_from('Hello') - >>> K3 = nx.Graph([(0,1),(1,2),(2,0)]) - >>> G.add_nodes_from(K3) - >>> sorted(G.nodes(),key=str) - [0, 1, 2, 'H', 'e', 'l', 'o'] - - Use keywords to update specific node attributes for every node. - - >>> G.add_nodes_from([1,2], size=10) - >>> G.add_nodes_from([3,4], weight=0.4) - - Use (node, attrdict) tuples to update attributes for specific - nodes. - - >>> G.add_nodes_from([(1,dict(size=11)), (2,{'color':'blue'})]) - >>> G.node[1]['size'] - 11 - >>> H = nx.Graph() - >>> H.add_nodes_from(G.nodes(data=True)) - >>> H.node[1]['size'] - 11 - - """ - for n in nodes: - try: - newnode = n not in self.succ - except TypeError: - nn, ndict = n - if nn not in self.succ: - self.succ[nn] = collections.OrderedDict() - self.pred[nn] = collections.OrderedDict() - newdict = attr.copy() - newdict.update(ndict) - self.node[nn] = newdict - else: - olddict = self.node[nn] - olddict.update(attr) - olddict.update(ndict) - continue - if newnode: - self.succ[n] = collections.OrderedDict() - self.pred[n] = collections.OrderedDict() - self.node[n] = attr.copy() - else: - self.node[n].update(attr) - - def remove_node(self, n): - """Remove node n. - - Removes the node n and all adjacent edges. - Attempting to remove a non-existent node will raise an exception. - - Parameters - ---------- - n : node - A node in the graph - - Raises - ------ - NetworkXError - If n is not in the graph. - - See Also - -------- - remove_nodes_from - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2]) - >>> G.edges() - [(0, 1), (1, 2)] - >>> G.remove_node(1) - >>> G.edges() - [] - - """ - try: - nbrs = self.succ[n] - del self.node[n] - except KeyError: # NetworkXError if n not in self - raise NetworkXError(f"The node {n} is not in the digraph.") - for u in nbrs: - del self.pred[u][n] # remove all edges n-u in digraph - del self.succ[n] # remove node from succ - for u in self.pred[n]: - del self.succ[u][n] # remove all edges n-u in digraph - del self.pred[n] # remove node from pred - - def remove_nodes_from(self, nbunch): - """Remove multiple nodes. - - Parameters - ---------- - nodes : iterable container - A container of nodes (list, dict, set, etc.). If a node - in the container is not in the graph it is silently - ignored. - - See Also - -------- - remove_node - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2]) - >>> e = G.nodes() - >>> e - [0, 1, 2] - >>> G.remove_nodes_from(e) - >>> G.nodes() - [] - - """ - for n in nbunch: - try: - succs = self.succ[n] - del self.node[n] - for u in succs: - del self.pred[u][n] # remove all edges n-u in digraph - del self.succ[n] # now remove node - for u in self.pred[n]: - del self.succ[u][n] # remove all edges n-u in digraph - del self.pred[n] # now remove node - except KeyError: - pass # silent failure on remove - - def add_edge(self, u, v, attr_dict=None, **attr): - """Add an edge between u and v. - - The nodes u and v will be automatically added if they are - not already in the graph. - - Edge attributes can be specified with keywords or by providing - a dictionary with key/value pairs. See examples below. - - Parameters - ---------- - u,v : nodes - Nodes can be, for example, strings or numbers. - Nodes must be hashable (and not None) Python objects. - attr_dict : dictionary, optional (default= no attributes) - Dictionary of edge attributes. Key/value pairs will - update existing data associated with the edge. - attr : keyword arguments, optional - Edge data (or labels or objects) can be assigned using - keyword arguments. - - See Also - -------- - add_edges_from : add a collection of edges - - Notes - ----- - Adding an edge that already exists updates the edge data. - - Many NetworkX algorithms designed for weighted graphs use as - the edge weight a numerical value assigned to a keyword - which by default is 'weight'. - - Examples - -------- - The following all add the edge e=(1,2) to graph G: - - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> e = (1,2) - >>> G.add_edge(1, 2) # explicit two-node form - >>> G.add_edge(*e) # single edge as tuple of two nodes - # >>> G.add_edges_from( [(1,2)] ) # add edges from iterable container - - Associate data to edges using keywords: - - >>> G.add_edge(1, 2, weight=3) - >>> G.add_edge(1, 3, weight=7, capacity=15, length=342.7) - """ - # set up attribute dict - if attr_dict is None: - attr_dict = attr - else: - try: - attr_dict.update(attr) - except AttributeError: - raise NetworkXError("The attr_dict argument must be a dictionary.") - # add nodes - if u not in self.succ: - self.succ[u] = collections.OrderedDict() - self.pred[u] = collections.OrderedDict() - self.node[u] = collections.OrderedDict() - if v not in self.succ: - self.succ[v] = collections.OrderedDict() - self.pred[v] = collections.OrderedDict() - self.node[v] = collections.OrderedDict() - # add the edge - datadict = self.adj[u].get(v, collections.OrderedDict()) - datadict.update(attr_dict) - self.succ[u][v] = datadict - self.pred[v][u] = datadict - - def remove_edge(self, u, v): - """Remove the edge between u and v. - - Parameters - ---------- - u,v: nodes - Remove the edge between nodes u and v. - - Raises - ------ - NetworkXError - If there is not an edge between u and v. - - See Also - -------- - remove_edges_from : remove a collection of edges - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.remove_edge(0,1) - >>> e = (1,2) - >>> G.remove_edge(*e) # unpacks e from an edge tuple - >>> e = (2,3,{'weight':7}) # an edge with attribute data - >>> G.remove_edge(*e[:2]) # select first part of edge tuple - """ - try: - del self.succ[u][v] - del self.pred[v][u] - except KeyError: - raise NetworkXError(f"The edge {u}-{v} not in graph.") - - def remove_edges_from(self, ebunch): - """Remove all edges specified in ebunch. - - Parameters - ---------- - ebunch: list or container of edge tuples - Each edge given in the list or container will be removed - from the graph. The edges can be: - - - 2-tuples (u,v) edge between u and v. - - 3-tuples (u,v,k) where k is ignored. - - See Also - -------- - remove_edge : remove a single edge - - Notes - ----- - Will fail silently if an edge in ebunch is not in the graph. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> ebunch=[(1,2),(2,3)] - >>> G.remove_edges_from(ebunch) - """ - for e in ebunch: - (u, v) = e[:2] # ignore edge data - if u in self.succ and v in self.succ[u]: - del self.succ[u][v] - del self.pred[v][u] - - def has_successor(self, u, v): - """Return True if node u has successor v. - - This is true if graph has the edge u->v. - """ - return u in self.succ and v in self.succ[u] - - def has_predecessor(self, u, v): - """Return True if node u has predecessor v. - - This is true if graph has the edge u<-v. - """ - return u in self.pred and v in self.pred[u] - - def successors_iter(self, n): - """Return an iterator over successor nodes of n. - - neighbors_iter() and successors_iter() are the same. - """ - try: - return iter(self.succ[n]) - except KeyError: - raise NetworkXError(f"The node {n} is not in the digraph.") - - def predecessors_iter(self, n): - """Return an iterator over predecessor nodes of n.""" - try: - return iter(self.pred[n]) - except KeyError: - raise NetworkXError(f"The node {n} is not in the digraph.") - - def successors(self, n): - """Return a list of successor nodes of n. - - neighbors() and successors() are the same function. - """ - return list(self.successors_iter(n)) - - def predecessors(self, n): - """Return a list of predecessor nodes of n.""" - return list(self.predecessors_iter(n)) - - # digraph definitions - neighbors = successors - neighbors_iter = successors_iter - - def edges_iter(self, nbunch=None, data=False): - """Return an iterator over the edges. - - Edges are returned as tuples with optional data - in the order (node, neighbor, data). - - Parameters - ---------- - nbunch : iterable container, optional (default= all nodes) - A container of nodes. The container will be iterated - through once. - data : bool, optional (default=False) - If True, return edge attribute dict in 3-tuple (u,v,data). - - Returns - ------- - edge_iter : iterator - An iterator of (u,v) or (u,v,d) tuples of edges. - - See Also - -------- - edges : return a list of edges - - Notes - ----- - Nodes in nbunch that are not in the graph will be (quietly) ignored. - For directed graphs this returns the out-edges. - - Examples - -------- - >>> G = nx.DiGraph() # or MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> [e for e in G.edges_iter()] - [(0, 1), (1, 2), (2, 3)] - >>> list(G.edges_iter(data=True)) # default data is {} (empty dict) - [(0, 1, {}), (1, 2, {}), (2, 3, {})] - >>> list(G.edges_iter([0,2])) - [(0, 1), (2, 3)] - >>> list(G.edges_iter(0)) - [(0, 1)] - - """ - if nbunch is None: - nodes_nbrs = self.adj.items() - else: - nodes_nbrs = ((n, self.adj[n]) for n in self.nbunch_iter(nbunch)) - if data: - for n, nbrs in nodes_nbrs: - for nbr, data in nbrs.items(): - yield (n, nbr, data) - else: - for n, nbrs in nodes_nbrs: - for nbr in nbrs: - yield (n, nbr) - - # alias out_edges to edges - out_edges_iter = edges_iter - out_edges = Graph.edges - - def in_edges_iter(self, nbunch=None, data=False): - """Return an iterator over the incoming edges. - - Parameters - ---------- - nbunch : iterable container, optional (default= all nodes) - A container of nodes. The container will be iterated - through once. - data : bool, optional (default=False) - If True, return edge attribute dict in 3-tuple (u,v,data). - - Returns - ------- - in_edge_iter : iterator - An iterator of (u,v) or (u,v,d) tuples of incoming edges. - - See Also - -------- - edges_iter : return an iterator of edges - """ - if nbunch is None: - nodes_nbrs = self.pred.items() - else: - nodes_nbrs = ((n, self.pred[n]) for n in self.nbunch_iter(nbunch)) - if data: - for n, nbrs in nodes_nbrs: - for nbr, data in nbrs.items(): - yield (nbr, n, data) - else: - for n, nbrs in nodes_nbrs: - for nbr in nbrs: - yield (nbr, n) - - def in_edges(self, nbunch=None, data=False): - """Return a list of the incoming edges. - - See Also - -------- - edges : return a list of edges - """ - return list(self.in_edges_iter(nbunch, data)) - - def degree_iter(self, nbunch=None, weight=None): - """Return an iterator for (node, degree). - - The node degree is the number of edges adjacent to the node. - - Parameters - ---------- - nbunch : iterable container, optional (default=all nodes) - A container of nodes. The container will be iterated - through once. - - weight : string or None, optional (default=None) - The edge attribute that holds the numerical value used - as a weight. If None, then each edge has weight 1. - The degree is the sum of the edge weights adjacent to the node. - - Returns - ------- - nd_iter : an iterator - The iterator returns two-tuples of (node, degree). - - See Also - -------- - degree, in_degree, out_degree, in_degree_iter, out_degree_iter - - Examples - -------- - >>> G = nx.DiGraph() # or MultiDiGraph - >>> G.add_path([0,1,2,3]) - >>> list(G.degree_iter(0)) # node 0 with degree 1 - [(0, 1)] - >>> list(G.degree_iter([0,1])) - [(0, 1), (1, 2)] - - """ - if nbunch is None: - nodes_nbrs = zip(iter(self.succ.items()), iter(self.pred.items())) - else: - nodes_nbrs = zip( - ((n, self.succ[n]) for n in self.nbunch_iter(nbunch)), - ((n, self.pred[n]) for n in self.nbunch_iter(nbunch)), - ) - - if weight is None: - for (n, succ), (_n2, pred) in nodes_nbrs: - yield (n, len(succ) + len(pred)) - else: - # edge weighted graph - degree is sum of edge weights - for (n, succ), (_n2, pred) in nodes_nbrs: - yield ( - n, - sum(succ[nbr].get(weight, 1) for nbr in succ) - + sum(pred[nbr].get(weight, 1) for nbr in pred), - ) - - def in_degree_iter(self, nbunch=None, weight=None): - """Return an iterator for (node, in-degree). - - The node in-degree is the number of edges pointing in to the node. - - Parameters - ---------- - nbunch : iterable container, optional (default=all nodes) - A container of nodes. The container will be iterated - through once. - - weight : string or None, optional (default=None) - The edge attribute that holds the numerical value used - as a weight. If None, then each edge has weight 1. - The degree is the sum of the edge weights adjacent to the node. - - Returns - ------- - nd_iter : an iterator - The iterator returns two-tuples of (node, in-degree). - - See Also - -------- - degree, in_degree, out_degree, out_degree_iter - - Examples - -------- - >>> G = nx.DiGraph() - >>> G.add_path([0,1,2,3]) - >>> list(G.in_degree_iter(0)) # node 0 with degree 0 - [(0, 0)] - >>> list(G.in_degree_iter([0,1])) - [(0, 0), (1, 1)] - - """ - if nbunch is None: - nodes_nbrs = self.pred.items() - else: - nodes_nbrs = ((n, self.pred[n]) for n in self.nbunch_iter(nbunch)) - - if weight is None: - for n, nbrs in nodes_nbrs: - yield (n, len(nbrs)) - else: - # edge weighted graph - degree is sum of edge weights - for n, nbrs in nodes_nbrs: - yield (n, sum(data.get(weight, 1) for data in nbrs.values())) - - def out_degree_iter(self, nbunch=None, weight=None): - """Return an iterator for (node, out-degree). - - The node out-degree is the number of edges pointing out of the node. - - Parameters - ---------- - nbunch : iterable container, optional (default=all nodes) - A container of nodes. The container will be iterated - through once. - - weight : string or None, optional (default=None) - The edge attribute that holds the numerical value used - as a weight. If None, then each edge has weight 1. - The degree is the sum of the edge weights adjacent to the node. - - Returns - ------- - nd_iter : an iterator - The iterator returns two-tuples of (node, out-degree). - - See Also - -------- - degree, in_degree, out_degree, in_degree_iter - - Examples - -------- - >>> G = nx.DiGraph() - >>> G.add_path([0,1,2,3]) - >>> list(G.out_degree_iter(0)) # node 0 with degree 1 - [(0, 1)] - >>> list(G.out_degree_iter([0,1])) - [(0, 1), (1, 1)] - - """ - if nbunch is None: - nodes_nbrs = self.succ.items() - else: - nodes_nbrs = ((n, self.succ[n]) for n in self.nbunch_iter(nbunch)) - - if weight is None: - for n, nbrs in nodes_nbrs: - yield (n, len(nbrs)) - else: - # edge weighted graph - degree is sum of edge weights - for n, nbrs in nodes_nbrs: - yield (n, sum(data.get(weight, 1) for data in nbrs.values())) - - def in_degree(self, nbunch=None, weight=None): - """Return the in-degree of a node or nodes. - - The node in-degree is the number of edges pointing in to the node. - - Parameters - ---------- - nbunch : iterable container, optional (default=all nodes) - A container of nodes. The container will be iterated - through once. - - weight : string or None, optional (default=None) - The edge attribute that holds the numerical value used - as a weight. If None, then each edge has weight 1. - The degree is the sum of the edge weights adjacent to the node. - - Returns - ------- - nd : dictionary, or number - A dictionary with nodes as keys and in-degree as values or - a number if a single node is specified. - - See Also - -------- - degree, out_degree, in_degree_iter - - Examples - -------- - >>> G = nx.DiGraph() # or MultiDiGraph - >>> G.add_path([0,1,2,3]) - >>> G.in_degree(0) - 0 - >>> G.in_degree([0,1]) - {0: 0, 1: 1} - >>> list(G.in_degree([0,1]).values()) - [0, 1] - """ - if nbunch in self: # return a single node - return next(self.in_degree_iter(nbunch, weight))[1] - else: # return a dict - return dict(self.in_degree_iter(nbunch, weight)) - - def out_degree(self, nbunch=None, weight=None): - """Return the out-degree of a node or nodes. - - The node out-degree is the number of edges pointing out of the node. - - Parameters - ---------- - nbunch : iterable container, optional (default=all nodes) - A container of nodes. The container will be iterated - through once. - - weight : string or None, optional (default=None) - The edge attribute that holds the numerical value used - as a weight. If None, then each edge has weight 1. - The degree is the sum of the edge weights adjacent to the node. - - Returns - ------- - nd : dictionary, or number - A dictionary with nodes as keys and out-degree as values or - a number if a single node is specified. - - Examples - -------- - >>> G = nx.DiGraph() # or MultiDiGraph - >>> G.add_path([0,1,2,3]) - >>> G.out_degree(0) - 1 - >>> G.out_degree([0,1]) - {0: 1, 1: 1} - >>> list(G.out_degree([0,1]).values()) - [1, 1] - - - """ - if nbunch in self: # return a single node - return next(self.out_degree_iter(nbunch, weight))[1] - else: # return a dict - return dict(self.out_degree_iter(nbunch, weight)) - - def clear(self): - """Remove all nodes and edges from the graph. - - This also removes the name, and all graph, node, and edge attributes. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.clear() - >>> G.nodes() - [] - >>> G.edges() - [] - - """ - self.succ.clear() - self.pred.clear() - self.node.clear() - self.graph.clear() - - def is_multigraph(self): - """Return True if graph is a multigraph, False otherwise.""" - return False - - def is_directed(self): - """Return True if graph is directed, False otherwise.""" - return True - - def to_directed(self): - """Return a directed copy of the graph. - - Returns - ------- - G : DiGraph - A deepcopy of the graph. - - Notes - ----- - This returns a "deepcopy" of the edge, node, and - graph attributes which attempts to completely copy - all of the data and references. - - This is in contrast to the similar D=DiGraph(G) which returns a - shallow copy of the data. - - See the Python copy module for more information on shallow - and deep copies, http://docs.python.org/library/copy.html. - - Examples - -------- - >>> G = nx.Graph() # or MultiGraph, etc - >>> G.add_path([0,1]) - >>> H = G.to_directed() - >>> H.edges() - [(0, 1), (1, 0)] - - If already directed, return a (deep) copy - - >>> G = nx.DiGraph() # or MultiDiGraph, etc - >>> G.add_path([0,1]) - >>> H = G.to_directed() - >>> H.edges() - [(0, 1)] - """ - return deepcopy(self) - - def to_undirected(self, reciprocal=False): - """Return an undirected representation of the digraph. - - Parameters - ---------- - reciprocal : bool (optional) - If True only keep edges that appear in both directions - in the original digraph. - - Returns - ------- - G : Graph - An undirected graph with the same name and nodes and - with edge (u,v,data) if either (u,v,data) or (v,u,data) - is in the digraph. If both edges exist in digraph and - their edge data is different, only one edge is created - with an arbitrary choice of which edge data to use. - You must check and correct for this manually if desired. - - Notes - ----- - If edges in both directions (u,v) and (v,u) exist in the - graph, attributes for the new undirected edge will be a combination of - the attributes of the directed edges. The edge data is updated - in the (arbitrary) order that the edges are encountered. For - more customized control of the edge attributes use add_edge(). - - This returns a "deepcopy" of the edge, node, and - graph attributes which attempts to completely copy - all of the data and references. - - This is in contrast to the similar G=DiGraph(D) which returns a - shallow copy of the data. - - See the Python copy module for more information on shallow - and deep copies, http://docs.python.org/library/copy.html. - """ - H = Graph() - H.name = self.name - H.add_nodes_from(self) - if reciprocal is True: - H.add_edges_from( - (u, v, deepcopy(d)) - for u, nbrs in self.adjacency_iter() - for v, d in nbrs.items() - if v in self.pred[u] - ) - else: - H.add_edges_from( - (u, v, deepcopy(d)) for u, nbrs in self.adjacency_iter() for v, d in nbrs.items() - ) - H.graph = deepcopy(self.graph) - H.node = deepcopy(self.node) - return H - - def reverse(self, copy=True): - """Return the reverse of the graph. - - The reverse is a graph with the same nodes and edges - but with the directions of the edges reversed. - - Parameters - ---------- - copy : bool optional (default=True) - If True, return a new DiGraph holding the reversed edges. - If False, reverse the reverse graph is created using - the original graph (this changes the original graph). - """ - if copy: - H = self.__class__(name="Reverse of (%s)" % self.name) - H.add_nodes_from(self) - H.add_edges_from((v, u, deepcopy(d)) for u, v, d in self.edges(data=True)) - H.graph = deepcopy(self.graph) - H.node = deepcopy(self.node) - else: - self.pred, self.succ = self.succ, self.pred - self.adj = self.succ - H = self - return H - - def subgraph(self, nbunch): - """Return the subgraph induced on nodes in nbunch. - - The induced subgraph of the graph contains the nodes in nbunch - and the edges between those nodes. - - Parameters - ---------- - nbunch : list, iterable - A container of nodes which will be iterated through once. - - Returns - ------- - G : Graph - A subgraph of the graph with the same edge attributes. - - Notes - ----- - The graph, edge or node attributes just point to the original graph. - So changes to the node or edge structure will not be reflected in - the original graph while changes to the attributes will. - - To create a subgraph with its own copy of the edge/node attributes use: - nx.Graph(G.subgraph(nbunch)) - - If edge attributes are containers, a deep copy can be obtained using: - G.subgraph(nbunch).copy() - - For an inplace reduction of a graph to a subgraph you can remove nodes: - G.remove_nodes_from([ n in G if n not in set(nbunch)]) - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> H = G.subgraph([0,1,2]) - >>> H.edges() - [(0, 1), (1, 2)] - """ - bunch = self.nbunch_iter(nbunch) - # create new graph and copy subgraph into it - H = self.__class__() - # copy node and attribute dictionaries - for n in bunch: - H.node[n] = self.node[n] - # namespace shortcuts for speed - H_succ = H.succ - H_pred = H.pred - self_succ = self.succ - # add nodes - for n in H: - H_succ[n] = collections.OrderedDict() - H_pred[n] = collections.OrderedDict() - # add edges - for u in H_succ: - Hnbrs = H_succ[u] - for v, datadict in self_succ[u].items(): - if v in H_succ: - # add both representations of edge: u-v and v-u - Hnbrs[v] = datadict - H_pred[v][u] = datadict - H.graph = self.graph - return H diff --git a/ConfigSpace/nx/classes/graph.py b/ConfigSpace/nx/classes/graph.py deleted file mode 100644 index 701aa02c..00000000 --- a/ConfigSpace/nx/classes/graph.py +++ /dev/null @@ -1,1812 +0,0 @@ -"""Base class for undirected graphs. - -The Graph class allows any hashable object as a node -and can associate key/value attribute pairs with each undirected edge. - -Self-loops are allowed but multiple edges are not (see MultiGraph). - -For directed graphs see DiGraph and MultiDiGraph. -""" -# Copyright (C) 2004-2011 by -# Aric Hagberg -# Dan Schult -# Pieter Swart -# All rights reserved. -# BSD license. -from __future__ import annotations - -import collections -from copy import deepcopy - -from ConfigSpace.nx.exception import NetworkXError - -__author__ = """\n""".join( - [ - "Aric Hagberg (hagberg@lanl.gov)", - "Pieter Swart (swart@lanl.gov)", - "Dan Schult(dschult@colgate.edu)", - ], -) - - -class Graph: - """ - Base class for undirected graphs. - - A Graph stores nodes and edges with optional data, or attributes. - - Graphs hold undirected edges. Self loops are allowed but multiple - (parallel) edges are not. - - Nodes can be arbitrary (hashable) Python objects with optional - key/value attributes. - - Edges are represented as links between nodes with optional - key/value attributes. - - Parameters - ---------- - data : input graph - Data to initialize graph. If data=None (default) an empty - graph is created. The data can be an edge list, or any - NetworkX graph object. If the corresponding optional Python - packages are installed the data can also be a NumPy matrix - or 2d ndarray, a SciPy sparse matrix, or a PyGraphviz graph. - attr : keyword arguments, optional (default= no attributes) - Attributes to add to graph as key=value pairs. - - See Also - -------- - DiGraph - MultiGraph - MultiDiGraph - - Examples - -------- - Create an empty graph structure (a "null graph") with no nodes and - no edges. - - >>> G = nx.Graph() - - G can be grown in several ways. - - **Nodes:** - - Add one node at a time: - - >>> G.add_node(1) - - Add the nodes from any container (a list, dict, set or - even the lines from a file or the nodes from another graph). - - >>> G.add_nodes_from([2,3]) - >>> G.add_nodes_from(range(100,110)) - >>> H=nx.Graph() - >>> H.add_path([0,1,2,3,4,5,6,7,8,9]) - >>> G.add_nodes_from(H) - - In addition to strings and integers any hashable Python object - (except None) can represent a node, e.g. a customized node object, - or even another Graph. - - >>> G.add_node(H) - - **Edges:** - - G can also be grown by adding edges. - - Add one edge, - - >>> G.add_edge(1, 2) - - a list of edges, - - >>> G.add_edges_from([(1,2),(1,3)]) - - or a collection of edges, - - >>> G.add_edges_from(H.edges()) - - If some edges connect nodes not yet in the graph, the nodes - are added automatically. There are no errors when adding - nodes or edges that already exist. - - **Attributes:** - - Each graph, node, and edge can hold key/value attribute pairs - in an associated attribute dictionary (the keys must be hashable). - By default these are empty, but can be added or changed using - add_edge, add_node or direct manipulation of the attribute - dictionaries named graph, node and edge respectively. - - >>> G = nx.Graph(day="Friday") - >>> G.graph - {'day': 'Friday'} - - Add node attributes using add_node(), add_nodes_from() or G.node - - >>> G.add_node(1, time='5pm') - >>> G.add_nodes_from([3], time='2pm') - >>> G.node[1] - {'time': '5pm'} - >>> G.node[1]['room'] = 714 - >>> del G.node[1]['room'] # remove attribute - >>> G.nodes(data=True) - [(1, {'time': '5pm'}), (3, {'time': '2pm'})] - - Warning: adding a node to G.node does not add it to the graph. - - Add edge attributes using add_edge(), add_edges_from(), subscript - notation, or G.edge. - - >>> G.add_edge(1, 2, weight=4.7 ) - >>> G.add_edges_from([(3,4),(4,5)], color='red') - >>> G.add_edges_from([(1,2,{'color':'blue'}), (2,3,{'weight':8})]) - >>> G[1][2]['weight'] = 4.7 - >>> G.edge[1][2]['weight'] = 4 - - **Shortcuts:** - - Many common graph features allow python syntax to speed reporting. - - >>> 1 in G # check if node in graph - True - >>> [n for n in G if n<3] # iterate through nodes - [1, 2] - >>> len(G) # number of nodes in graph - 5 - - The fastest way to traverse all edges of a graph is via - adjacency_iter(), but the edges() method is often more convenient. - - >>> for n,nbrsdict in G.adjacency_iter(): - ... for nbr,eattr in nbrsdict.items(): - ... if 'weight' in eattr: - ... (n,nbr,eattr['weight']) - (1, 2, 4) - (2, 1, 4) - (2, 3, 8) - (3, 2, 8) - >>> [ (u,v,edata['weight']) for u,v,edata in G.edges(data=True) if 'weight' in edata ] - [(1, 2, 4), (2, 3, 8)] - - **Reporting:** - - Simple graph information is obtained using methods. - Iterator versions of many reporting methods exist for efficiency. - Methods exist for reporting nodes(), edges(), neighbors() and degree() - as well as the number of nodes and edges. - - For details on these and other miscellaneous methods, see below. - """ - - def __init__(self, data=None, **attr): - """Initialize a graph with edges, name, graph attributes. - - Parameters - ---------- - data : input graph - Data to initialize graph. If data=None (default) an empty - graph is created. The data can be an edge list, or any - NetworkX graph object. If the corresponding optional Python - packages are installed the data can also be a NumPy matrix - or 2d ndarray, a SciPy sparse matrix, or a PyGraphviz graph. - name : string, optional (default='') - An optional name for the graph. - attr : keyword arguments, optional (default= no attributes) - Attributes to add to graph as key=value pairs. - - See Also - -------- - convert - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G = nx.Graph(name='my graph') - >>> e = [(1,2),(2,3),(3,4)] # list of edges - >>> G = nx.Graph(e) - - Arbitrary graph attribute pairs (key=value) may be assigned - - >>> G=nx.Graph(e, day="Friday") - >>> G.graph - {'day': 'Friday'} - - """ - self.graph = collections.OrderedDict() # dictionary for graph attributes - self.node = collections.OrderedDict() # empty node dict (created before convert) - self.adj = collections.OrderedDict() # empty adjacency dict - # attempt to load graph with data - # if data is not None: - # load graph attributes (must be after convert) - self.graph.update(attr) - self.edge = self.adj - - @property - def name(self): - return self.graph.get("name", "") - - @name.setter - def name(self, s): - self.graph["name"] = s - - def __str__(self): - """Return the graph name. - - Returns - ------- - name : string - The name of the graph. - - Examples - -------- - >>> G = nx.Graph(name='foo') - >>> str(G) - 'foo' - """ - return self.name - - def __iter__(self): - """Iterate over the nodes. Use the expression 'for n in G'. - - Returns - ------- - niter : iterator - An iterator over all nodes in the graph. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - """ - return iter(self.node) - - def __contains__(self, n): - """Return True if n is a node, False otherwise. Use the expression - 'n in G'. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> 1 in G - True - """ - try: - return n in self.node - except TypeError: - return False - - def __len__(self): - """Return the number of nodes. Use the expression 'len(G)'. - - Returns - ------- - nnodes : int - The number of nodes in the graph. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> len(G) - 4 - - """ - return len(self.node) - - def __getitem__(self, n): - """Return a dict of neighbors of node n. Use the expression 'G[n]'. - - Parameters - ---------- - n : node - A node in the graph. - - Returns - ------- - adj_dict : dictionary - The adjacency dictionary for nodes connected to n. - - Notes - ----- - G[n] is similar to G.neighbors(n) but the internal data dictionary - is returned instead of a list. - - Assigning G[n] will corrupt the internal graph data structure. - Use G[n] for reading data only. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G[0] - {1: {}} - """ - return self.adj[n] - - def add_node(self, n, attr_dict=None, **attr): - """Add a single node n and update node attributes. - - Parameters - ---------- - n : node - A node can be any hashable Python object except None. - attr_dict : dictionary, optional (default= no attributes) - Dictionary of node attributes. Key/value pairs will - update existing data associated with the node. - attr : keyword arguments, optional - Set or change attributes using key=value. - - See Also - -------- - add_nodes_from - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_node(1) - >>> G.add_node('Hello') - >>> K3 = nx.Graph([(0,1),(1,2),(2,0)]) - >>> G.add_node(K3) - >>> G.number_of_nodes() - 3 - - Use keywords set/change node attributes: - - >>> G.add_node(1,size=10) - >>> G.add_node(3,weight=0.4,UTM=('13S',382871,3972649)) - - Notes - ----- - A hashable object is one that can be used as a key in a Python - dictionary. This includes strings, numbers, tuples of strings - and numbers, etc. - - On many platforms hashable items also include mutables such as - NetworkX Graphs, though one should be careful that the hash - doesn't change on mutables. - """ - # set up attribute dict - if attr_dict is None: - attr_dict = attr - else: - try: - attr_dict.update(attr) - except AttributeError: - raise NetworkXError("The attr_dict argument must be a dictionary.") - if n not in self.node: - self.adj[n] = collections.OrderedDict() - self.node[n] = attr_dict - else: # update attr even if node already exists - self.node[n].update(attr_dict) - - def add_nodes_from(self, nodes, **attr): - """Add multiple nodes. - - Parameters - ---------- - nodes : iterable container - A container of nodes (list, dict, set, etc.). - OR - A container of (node, attribute dict) tuples. - Node attributes are updated using the attribute dict. - attr : keyword arguments, optional (default= no attributes) - Update attributes for all nodes in nodes. - Node attributes specified in nodes as a tuple - take precedence over attributes specified generally. - - See Also - -------- - add_node - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_nodes_from('Hello') - >>> K3 = nx.Graph([(0,1),(1,2),(2,0)]) - >>> G.add_nodes_from(K3) - >>> sorted(G.nodes(),key=str) - [0, 1, 2, 'H', 'e', 'l', 'o'] - - Use keywords to update specific node attributes for every node. - - >>> G.add_nodes_from([1,2], size=10) - >>> G.add_nodes_from([3,4], weight=0.4) - - Use (node, attrdict) tuples to update attributes for specific - nodes. - - >>> G.add_nodes_from([(1,dict(size=11)), (2,{'color':'blue'})]) - >>> G.node[1]['size'] - 11 - >>> H = nx.Graph() - >>> H.add_nodes_from(G.nodes(data=True)) - >>> H.node[1]['size'] - 11 - - """ - for n in nodes: - try: - newnode = n not in self.node - except TypeError: - nn, ndict = n - if nn not in self.node: - self.adj[nn] = collections.OrderedDict() - newdict = attr.copy() - newdict.update(ndict) - self.node[nn] = newdict - else: - olddict = self.node[nn] - olddict.update(attr) - olddict.update(ndict) - continue - if newnode: - self.adj[n] = collections.OrderedDict() - self.node[n] = attr.copy() - else: - self.node[n].update(attr) - - def remove_node(self, n): - """Remove node n. - - Removes the node n and all adjacent edges. - Attempting to remove a non-existent node will raise an exception. - - Parameters - ---------- - n : node - A node in the graph - - Raises - ------ - NetworkXError - If n is not in the graph. - - See Also - -------- - remove_nodes_from - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2]) - >>> G.edges() - [(0, 1), (1, 2)] - >>> G.remove_node(1) - >>> G.edges() - [] - - """ - adj = self.adj - try: - nbrs = list(adj[n].keys()) # keys handles self-loops (allow mutation later) - del self.node[n] - except KeyError: # NetworkXError if n not in self - raise NetworkXError(f"The node {n} is not in the graph.") - for u in nbrs: - del adj[u][n] # remove all edges n-u in graph - del adj[n] # now remove node - - def remove_nodes_from(self, nodes): - """Remove multiple nodes. - - Parameters - ---------- - nodes : iterable container - A container of nodes (list, dict, set, etc.). If a node - in the container is not in the graph it is silently - ignored. - - See Also - -------- - remove_node - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2]) - >>> e = G.nodes() - >>> e - [0, 1, 2] - >>> G.remove_nodes_from(e) - >>> G.nodes() - [] - - """ - adj = self.adj - for n in nodes: - try: - del self.node[n] - for u in list(adj[n].keys()): # keys() handles self-loops - del adj[u][n] # (allows mutation of dict in loop) - del adj[n] - except KeyError: - pass - - def nodes_iter(self, data=False): - """Return an iterator over the nodes. - - Parameters - ---------- - data : boolean, optional (default=False) - If False the iterator returns nodes. If True - return a two-tuple of node and node data dictionary - - Returns - ------- - niter : iterator - An iterator over nodes. If data=True the iterator gives - two-tuples containing (node, node data, dictionary) - - Notes - ----- - If the node data is not required it is simpler and equivalent - to use the expression 'for n in G'. - - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2]) - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2]) - - >>> [d for n,d in G.nodes_iter(data=True)] - [{}, {}, {}] - """ - if data: - return iter(self.node.items()) - return iter(self.node) - - def nodes(self, data=False): - """Return a list of the nodes in the graph. - - Parameters - ---------- - data : boolean, optional (default=False) - If False return a list of nodes. If True return a - two-tuple of node and node data dictionary - - Returns - ------- - nlist : list - A list of nodes. If data=True a list of two-tuples containing - (node, node data dictionary). - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2]) - >>> G.nodes() - [0, 1, 2] - >>> G.add_node(1, time='5pm') - >>> G.nodes(data=True) - [(0, {}), (1, {'time': '5pm'}), (2, {})] - """ - return list(self.nodes_iter(data=data)) - - def number_of_nodes(self): - """Return the number of nodes in the graph. - - Returns - ------- - nnodes : int - The number of nodes in the graph. - - See Also - -------- - order, __len__ which are identical - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2]) - >>> len(G) - 3 - """ - return len(self.node) - - def order(self): - """Return the number of nodes in the graph. - - Returns - ------- - nnodes : int - The number of nodes in the graph. - - See Also - -------- - number_of_nodes, __len__ which are identical - - """ - return len(self.node) - - def has_node(self, n): - """Return True if the graph contains the node n. - - Parameters - ---------- - n : node - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2]) - >>> G.has_node(0) - True - - It is more readable and simpler to use - - >>> 0 in G - True - - """ - try: - return n in self.node - except TypeError: - return False - - def add_edge(self, u, v, attr_dict=None, **attr): - """Add an edge between u and v. - - The nodes u and v will be automatically added if they are - not already in the graph. - - Edge attributes can be specified with keywords or by providing - a dictionary with key/value pairs. See examples below. - - Parameters - ---------- - u,v : nodes - Nodes can be, for example, strings or numbers. - Nodes must be hashable (and not None) Python objects. - attr_dict : dictionary, optional (default= no attributes) - Dictionary of edge attributes. Key/value pairs will - update existing data associated with the edge. - attr : keyword arguments, optional - Edge data (or labels or objects) can be assigned using - keyword arguments. - - See Also - -------- - add_edges_from : add a collection of edges - - Notes - ----- - Adding an edge that already exists updates the edge data. - - Many NetworkX algorithms designed for weighted graphs use as - the edge weight a numerical value assigned to a keyword - which by default is 'weight'. - - Examples - -------- - The following all add the edge e=(1,2) to graph G: - - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> e = (1,2) - >>> G.add_edge(1, 2) # explicit two-node form - >>> G.add_edge(*e) # single edge as tuple of two nodes - >>> G.add_edges_from( [(1,2)] ) # add edges from iterable container - - Associate data to edges using keywords: - - >>> G.add_edge(1, 2, weight=3) - >>> G.add_edge(1, 3, weight=7, capacity=15, length=342.7) - """ - # set up attribute dictionary - if attr_dict is None: - attr_dict = attr - else: - try: - attr_dict.update(attr) - except AttributeError: - raise NetworkXError("The attr_dict argument must be a dictionary.") - # add nodes - if u not in self.node: - self.adj[u] = collections.OrderedDict() - self.node[u] = collections.OrderedDict() - if v not in self.node: - self.adj[v] = collections.OrderedDict() - self.node[v] = collections.OrderedDict() - # add the edge - datadict = self.adj[u].get(v, collections.OrderedDict()) - datadict.update(attr_dict) - self.adj[u][v] = datadict - self.adj[v][u] = datadict - - def add_edges_from(self, ebunch, attr_dict=None, **attr): - """Add all the edges in ebunch. - - Parameters - ---------- - ebunch : container of edges - Each edge given in the container will be added to the - graph. The edges must be given as as 2-tuples (u,v) or - 3-tuples (u,v,d) where d is a dictionary containing edge - data. - attr_dict : dictionary, optional (default= no attributes) - Dictionary of edge attributes. Key/value pairs will - update existing data associated with each edge. - attr : keyword arguments, optional - Edge data (or labels or objects) can be assigned using - keyword arguments. - - - See Also - -------- - add_edge : add a single edge - add_weighted_edges_from : convenient way to add weighted edges - - Notes - ----- - Adding the same edge twice has no effect but any edge data - will be updated when each duplicate edge is added. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_edges_from([(0,1),(1,2)]) # using a list of edge tuples - >>> e = zip(range(0,3),range(1,4)) - >>> G.add_edges_from(e) # Add the path graph 0-1-2-3 - - Associate data to edges - - >>> G.add_edges_from([(1,2),(2,3)], weight=3) - >>> G.add_edges_from([(3,4),(1,4)], label='WN2898') - """ - # set up attribute dict - if attr_dict is None: - attr_dict = attr - else: - try: - attr_dict.update(attr) - except AttributeError: - raise NetworkXError("The attr_dict argument must be a dictionary.") - # process ebunch - for e in ebunch: - ne = len(e) - if ne == 3: - u, v, dd = e - elif ne == 2: - u, v = e - dd = collections.OrderedDict() - else: - raise NetworkXError(f"Edge tuple {e} must be a 2-tuple or 3-tuple.") - if u not in self.node: - self.adj[u] = collections.OrderedDict() - self.node[u] = collections.OrderedDict() - if v not in self.node: - self.adj[v] = collections.OrderedDict() - self.node[v] = collections.OrderedDict() - datadict = self.adj[u].get(v, collections.OrderedDict()) - datadict.update(attr_dict) - datadict.update(dd) - self.adj[u][v] = datadict - self.adj[v][u] = datadict - - def add_weighted_edges_from(self, ebunch, weight="weight", **attr): - """Add all the edges in ebunch as weighted edges with specified - weights. - - Parameters - ---------- - ebunch : container of edges - Each edge given in the list or container will be added - to the graph. The edges must be given as 3-tuples (u,v,w) - where w is a number. - weight : string, optional (default= 'weight') - The attribute name for the edge weights to be added. - attr : keyword arguments, optional (default= no attributes) - Edge attributes to add/update for all edges. - - See Also - -------- - add_edge : add a single edge - add_edges_from : add multiple edges - - Notes - ----- - Adding the same edge twice for Graph/DiGraph simply updates - the edge data. For MultiGraph/MultiDiGraph, duplicate edges - are stored. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_weighted_edges_from([(0,1,3.0),(1,2,7.5)]) - """ - self.add_edges_from(((u, v, {weight: d}) for u, v, d in ebunch), **attr) - - def remove_edge(self, u, v): - """Remove the edge between u and v. - - Parameters - ---------- - u,v: nodes - Remove the edge between nodes u and v. - - Raises - ------ - NetworkXError - If there is not an edge between u and v. - - See Also - -------- - remove_edges_from : remove a collection of edges - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.remove_edge(0,1) - >>> e = (1,2) - >>> G.remove_edge(*e) # unpacks e from an edge tuple - >>> e = (2,3,{'weight':7}) # an edge with attribute data - >>> G.remove_edge(*e[:2]) # select first part of edge tuple - """ - try: - del self.adj[u][v] - if u != v: # self-loop needs only one entry removed - del self.adj[v][u] - except KeyError: - raise NetworkXError(f"The edge {u}-{v} is not in the graph") - - def remove_edges_from(self, ebunch): - """Remove all edges specified in ebunch. - - Parameters - ---------- - ebunch: list or container of edge tuples - Each edge given in the list or container will be removed - from the graph. The edges can be: - - - 2-tuples (u,v) edge between u and v. - - 3-tuples (u,v,k) where k is ignored. - - See Also - -------- - remove_edge : remove a single edge - - Notes - ----- - Will fail silently if an edge in ebunch is not in the graph. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> ebunch=[(1,2),(2,3)] - >>> G.remove_edges_from(ebunch) - """ - adj = self.adj - for e in ebunch: - u, v = e[:2] # ignore edge data if present - if u in adj and v in adj[u]: - del adj[u][v] - if u != v: # self loop needs only one entry removed - del adj[v][u] - - def has_edge(self, u, v): - """Return True if the edge (u,v) is in the graph. - - Parameters - ---------- - u,v : nodes - Nodes can be, for example, strings or numbers. - Nodes must be hashable (and not None) Python objects. - - Returns - ------- - edge_ind : bool - True if edge is in the graph, False otherwise. - - Examples - -------- - Can be called either using two nodes u,v or edge tuple (u,v) - - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.has_edge(0,1) # using two nodes - True - >>> e = (0,1) - >>> G.has_edge(*e) # e is a 2-tuple (u,v) - True - >>> e = (0,1,{'weight':7}) - >>> G.has_edge(*e[:2]) # e is a 3-tuple (u,v,data_dictionary) - True - - The following syntax are all equivalent: - - >>> G.has_edge(0,1) - True - >>> 1 in G[0] # though this gives KeyError if 0 not in G - True - - """ - try: - return v in self.adj[u] - except KeyError: - return False - - def neighbors(self, n): - """Return a list of the nodes connected to the node n. - - Parameters - ---------- - n : node - A node in the graph - - Returns - ------- - nlist : list - A list of nodes that are adjacent to n. - - Raises - ------ - NetworkXError - If the node n is not in the graph. - - Notes - ----- - It is usually more convenient (and faster) to access the - adjacency dictionary as G[n]: - - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_edge('a','b',weight=7) - >>> G['a'] - {'b': {'weight': 7}} - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.neighbors(0) - [1] - - """ - try: - return list(self.adj[n]) - except KeyError: - raise NetworkXError(f"The node {n} is not in the graph.") - - def neighbors_iter(self, n): - """Return an iterator over all neighbors of node n. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> [n for n in G.neighbors_iter(0)] - [1] - - Notes - ----- - It is faster to use the idiom "in G[0]", e.g. - - >>> G = nx.path_graph(4) - >>> [n for n in G[0]] - [1] - """ - try: - return iter(self.adj[n]) - except KeyError: - raise NetworkXError(f"The node {n} is not in the graph.") - - def edges(self, nbunch=None, data=False): - """Return a list of edges. - - Edges are returned as tuples with optional data - in the order (node, neighbor, data). - - Parameters - ---------- - nbunch : iterable container, optional (default= all nodes) - A container of nodes. The container will be iterated - through once. - data : bool, optional (default=False) - Return two tuples (u,v) (False) or three-tuples (u,v,data) (True). - - Returns - ------- - edge_list: list of edge tuples - Edges that are adjacent to any node in nbunch, or a list - of all edges if nbunch is not specified. - - See Also - -------- - edges_iter : return an iterator over the edges - - Notes - ----- - Nodes in nbunch that are not in the graph will be (quietly) ignored. - For directed graphs this returns the out-edges. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.edges() - [(0, 1), (1, 2), (2, 3)] - >>> G.edges(data=True) # default edge data is {} (empty dictionary) - [(0, 1, {}), (1, 2, {}), (2, 3, {})] - >>> G.edges([0,3]) - [(0, 1), (3, 2)] - >>> G.edges(0) - [(0, 1)] - - """ - return list(self.edges_iter(nbunch, data)) - - def edges_iter(self, nbunch=None, data=False): - """Return an iterator over the edges. - - Edges are returned as tuples with optional data - in the order (node, neighbor, data). - - Parameters - ---------- - nbunch : iterable container, optional (default= all nodes) - A container of nodes. The container will be iterated - through once. - data : bool, optional (default=False) - If True, return edge attribute dict in 3-tuple (u,v,data). - - Returns - ------- - edge_iter : iterator - An iterator of (u,v) or (u,v,d) tuples of edges. - - See Also - -------- - edges : return a list of edges - - Notes - ----- - Nodes in nbunch that are not in the graph will be (quietly) ignored. - For directed graphs this returns the out-edges. - - Examples - -------- - >>> G = nx.Graph() # or MultiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> [e for e in G.edges_iter()] - [(0, 1), (1, 2), (2, 3)] - >>> list(G.edges_iter(data=True)) # default data is {} (empty dict) - [(0, 1, {}), (1, 2, {}), (2, 3, {})] - >>> list(G.edges_iter([0,3])) - [(0, 1), (3, 2)] - >>> list(G.edges_iter(0)) - [(0, 1)] - - """ - seen = collections.OrderedDict() # helper dict to keep track of multiply stored edges - if nbunch is None: - nodes_nbrs = self.adj.items() - else: - nodes_nbrs = ((n, self.adj[n]) for n in self.nbunch_iter(nbunch)) - if data: - for n, nbrs in nodes_nbrs: - for nbr, data in nbrs.items(): - if nbr not in seen: - yield (n, nbr, data) - seen[n] = 1 - else: - for n, nbrs in nodes_nbrs: - for nbr in nbrs: - if nbr not in seen: - yield (n, nbr) - seen[n] = 1 - del seen - - def get_edge_data(self, u, v, default=None): - """Return the attribute dictionary associated with edge (u,v). - - Parameters - ---------- - u,v : nodes - default: any Python object (default=None) - Value to return if the edge (u,v) is not found. - - Returns - ------- - edge_dict : dictionary - The edge attribute dictionary. - - Notes - ----- - It is faster to use G[u][v]. - - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G[0][1] - {} - - Warning: Assigning G[u][v] corrupts the graph data structure. - But it is safe to assign attributes to that dictionary, - - >>> G[0][1]['weight'] = 7 - >>> G[0][1]['weight'] - 7 - >>> G[1][0]['weight'] - 7 - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.get_edge_data(0,1) # default edge data is {} - {} - >>> e = (0,1) - >>> G.get_edge_data(*e) # tuple form - {} - >>> G.get_edge_data('a','b',default=0) # edge not in graph, return 0 - 0 - """ - try: - return self.adj[u][v] - except KeyError: - return default - - def adjacency_list(self): - """Return an adjacency list representation of the graph. - - The output adjacency list is in the order of G.nodes(). - For directed graphs, only outgoing adjacencies are included. - - Returns - ------- - adj_list : lists of lists - The adjacency structure of the graph as a list of lists. - - See Also - -------- - adjacency_iter - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.adjacency_list() # in order given by G.nodes() - [[1], [0, 2], [1, 3], [2]] - - """ - return list(map(list, iter(self.adj.values()))) - - def adjacency_iter(self): - """Return an iterator of (node, adjacency dict) tuples for all nodes. - - This is the fastest way to look at every edge. - For directed graphs, only outgoing adjacencies are included. - - Returns - ------- - adj_iter : iterator - An iterator of (node, adjacency dictionary) for all nodes in - the graph. - - See Also - -------- - adjacency_list - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> [(n,nbrdict) for n,nbrdict in G.adjacency_iter()] - [(0, {1: {}}), (1, {0: {}, 2: {}}), (2, {1: {}, 3: {}}), (3, {2: {}})] - - """ - return iter(self.adj.items()) - - def degree(self, nbunch=None, weight=None): - """Return the degree of a node or nodes. - - The node degree is the number of edges adjacent to that node. - - Parameters - ---------- - nbunch : iterable container, optional (default=all nodes) - A container of nodes. The container will be iterated - through once. - - weight : string or None, optional (default=None) - The edge attribute that holds the numerical value used - as a weight. If None, then each edge has weight 1. - The degree is the sum of the edge weights adjacent to the node. - - Returns - ------- - nd : dictionary, or number - A dictionary with nodes as keys and degree as values or - a number if a single node is specified. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.degree(0) - 1 - >>> G.degree([0,1]) - {0: 1, 1: 2} - >>> list(G.degree([0,1]).values()) - [1, 2] - - """ - if nbunch in self: # return a single node - return next(self.degree_iter(nbunch, weight))[1] - else: # return a dict - return dict(self.degree_iter(nbunch, weight)) - - def degree_iter(self, nbunch=None, weight=None): - """Return an iterator for (node, degree). - - The node degree is the number of edges adjacent to the node. - - Parameters - ---------- - nbunch : iterable container, optional (default=all nodes) - A container of nodes. The container will be iterated - through once. - - weight : string or None, optional (default=None) - The edge attribute that holds the numerical value used - as a weight. If None, then each edge has weight 1. - The degree is the sum of the edge weights adjacent to the node. - - Returns - ------- - nd_iter : an iterator - The iterator returns two-tuples of (node, degree). - - See Also - -------- - degree - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> list(G.degree_iter(0)) # node 0 with degree 1 - [(0, 1)] - >>> list(G.degree_iter([0,1])) - [(0, 1), (1, 2)] - - """ - if nbunch is None: - nodes_nbrs = self.adj.items() - else: - nodes_nbrs = ((n, self.adj[n]) for n in self.nbunch_iter(nbunch)) - - if weight is None: - for n, nbrs in nodes_nbrs: - yield (n, len(nbrs) + (n in nbrs)) # return tuple (n,degree) - else: - # edge weighted graph - degree is sum of nbr edge weights - for n, nbrs in nodes_nbrs: - yield ( - n, - sum(nbrs[nbr].get(weight, 1) for nbr in nbrs) - + (n in nbrs and nbrs[n].get(weight, 1)), - ) - - def clear(self): - """Remove all nodes and edges from the graph. - - This also removes the name, and all graph, node, and edge attributes. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.clear() - >>> G.nodes() - [] - >>> G.edges() - [] - - """ - self.name = "" - self.adj.clear() - self.node.clear() - self.graph.clear() - - def copy(self): - """Return a copy of the graph. - - Returns - ------- - G : Graph - A copy of the graph. - - See Also - -------- - to_directed: return a directed copy of the graph. - - Notes - ----- - This makes a complete copy of the graph including all of the - node or edge attributes. - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> H = G.copy() - - """ - return deepcopy(self) - - def is_multigraph(self): - """Return True if graph is a multigraph, False otherwise.""" - return False - - def is_directed(self): - """Return True if graph is directed, False otherwise.""" - return False - - def to_directed(self): - """Return a directed representation of the graph. - - Returns - ------- - G : DiGraph - A directed graph with the same name, same nodes, and with - each edge (u,v,data) replaced by two directed edges - (u,v,data) and (v,u,data). - - Notes - ----- - This returns a "deepcopy" of the edge, node, and - graph attributes which attempts to completely copy - all of the data and references. - - This is in contrast to the similar D=DiGraph(G) which returns a - shallow copy of the data. - - See the Python copy module for more information on shallow - and deep copies, http://docs.python.org/library/copy.html. - - Examples - -------- - >>> G = nx.Graph() # or MultiGraph, etc - >>> G.add_path([0,1]) - >>> H = G.to_directed() - >>> H.edges() - [(0, 1), (1, 0)] - - If already directed, return a (deep) copy - - >>> G = nx.DiGraph() # or MultiDiGraph, etc - >>> G.add_path([0,1]) - >>> H = G.to_directed() - >>> H.edges() - [(0, 1)] - """ - from ConfigSpace.nx import DiGraph - - G = DiGraph() - G.name = self.name - G.add_nodes_from(self) - G.add_edges_from( - ( - (u, v, deepcopy(data)) - for u, nbrs in self.adjacency_iter() - for v, data in nbrs.items() - ), - ) - G.graph = deepcopy(self.graph) - G.node = deepcopy(self.node) - return G - - def to_undirected(self): - """Return an undirected copy of the graph. - - Returns - ------- - G : Graph/MultiGraph - A deepcopy of the graph. - - See Also - -------- - copy, add_edge, add_edges_from - - Notes - ----- - This returns a "deepcopy" of the edge, node, and - graph attributes which attempts to completely copy - all of the data and references. - - This is in contrast to the similar G=DiGraph(D) which returns a - shallow copy of the data. - - See the Python copy module for more information on shallow - and deep copies, http://docs.python.org/library/copy.html. - - Examples - -------- - >>> G = nx.Graph() # or MultiGraph, etc - >>> G.add_path([0,1]) - >>> H = G.to_directed() - >>> H.edges() - [(0, 1), (1, 0)] - >>> G2 = H.to_undirected() - >>> G2.edges() - [(0, 1)] - """ - return deepcopy(self) - - def subgraph(self, nbunch): - """Return the subgraph induced on nodes in nbunch. - - The induced subgraph of the graph contains the nodes in nbunch - and the edges between those nodes. - - Parameters - ---------- - nbunch : list, iterable - A container of nodes which will be iterated through once. - - Returns - ------- - G : Graph - A subgraph of the graph with the same edge attributes. - - Notes - ----- - The graph, edge or node attributes just point to the original graph. - So changes to the node or edge structure will not be reflected in - the original graph while changes to the attributes will. - - To create a subgraph with its own copy of the edge/node attributes use: - nx.Graph(G.subgraph(nbunch)) - - If edge attributes are containers, a deep copy can be obtained using: - G.subgraph(nbunch).copy() - - For an inplace reduction of a graph to a subgraph you can remove nodes: - G.remove_nodes_from([ n in G if n not in set(nbunch)]) - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> H = G.subgraph([0,1,2]) - >>> H.edges() - [(0, 1), (1, 2)] - """ - bunch = self.nbunch_iter(nbunch) - # create new graph and copy subgraph into it - H = self.__class__() - # copy node and attribute dictionaries - for n in bunch: - H.node[n] = self.node[n] - # namespace shortcuts for speed - H_adj = H.adj - self_adj = self.adj - # add nodes and edges (undirected method) - for n in H.node: - Hnbrs = collections.OrderedDict() - H_adj[n] = Hnbrs - for nbr, d in self_adj[n].items(): - if nbr in H_adj: - # add both representations of edge: n-nbr and nbr-n - Hnbrs[nbr] = d - H_adj[nbr][n] = d - H.graph = self.graph - return H - - def nodes_with_selfloops(self): - """Return a list of nodes with self loops. - - A node with a self loop has an edge with both ends adjacent - to that node. - - Returns - ------- - nodelist : list - A list of nodes with self loops. - - See Also - -------- - selfloop_edges, number_of_selfloops - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_edge(1,1) - >>> G.add_edge(1,2) - >>> G.nodes_with_selfloops() - [1] - """ - return [n for n, nbrs in self.adj.items() if n in nbrs] - - def selfloop_edges(self, data=False): - """Return a list of selfloop edges. - - A selfloop edge has the same node at both ends. - - Parameters - ---------- - data : bool, optional (default=False) - Return selfloop edges as two tuples (u,v) (data=False) - or three-tuples (u,v,data) (data=True) - - Returns - ------- - edgelist : list of edge tuples - A list of all selfloop edges. - - See Also - -------- - nodes_with_selfloops, number_of_selfloops - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_edge(1,1) - >>> G.add_edge(1,2) - >>> G.selfloop_edges() - [(1, 1)] - >>> G.selfloop_edges(data=True) - [(1, 1, {})] - """ - if data: - return [(n, n, nbrs[n]) for n, nbrs in self.adj.items() if n in nbrs] - else: - return [(n, n) for n, nbrs in self.adj.items() if n in nbrs] - - def number_of_selfloops(self): - """Return the number of selfloop edges. - - A selfloop edge has the same node at both ends. - - Returns - ------- - nloops : int - The number of selfloops. - - See Also - -------- - nodes_with_selfloops, selfloop_edges - - Examples - -------- - >>> G=nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_edge(1,1) - >>> G.add_edge(1,2) - >>> G.number_of_selfloops() - 1 - """ - return len(self.selfloop_edges()) - - def size(self, weight=None): - """Return the number of edges. - - Parameters - ---------- - weight : string or None, optional (default=None) - The edge attribute that holds the numerical value used - as a weight. If None, then each edge has weight 1. - - Returns - ------- - nedges : int - The number of edges of sum of edge weights in the graph. - - See Also - -------- - number_of_edges - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.size() - 3 - - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_edge('a','b',weight=2) - >>> G.add_edge('b','c',weight=4) - >>> G.size() - 2 - >>> G.size(weight='weight') - 6.0 - """ - s = sum(self.degree(weight=weight).values()) / 2 - if weight is None: - return int(s) - else: - return float(s) - - def number_of_edges(self, u=None, v=None): - """Return the number of edges between two nodes. - - Parameters - ---------- - u,v : nodes, optional (default=all edges) - If u and v are specified, return the number of edges between - u and v. Otherwise return the total number of all edges. - - Returns - ------- - nedges : int - The number of edges in the graph. If nodes u and v are specified - return the number of edges between those nodes. - - See Also - -------- - size - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.number_of_edges() - 3 - >>> G.number_of_edges(0,1) - 1 - >>> e = (0,1) - >>> G.number_of_edges(*e) - 1 - """ - if u is None: - return int(self.size()) - if v in self.adj[u]: - return 1 - else: - return 0 - - def add_star(self, nodes, **attr): - """Add a star. - - The first node in nodes is the middle of the star. It is connected - to all other nodes. - - Parameters - ---------- - nodes : iterable container - A container of nodes. - attr : keyword arguments, optional (default= no attributes) - Attributes to add to every edge in star. - - See Also - -------- - add_path, add_cycle - - Examples - -------- - >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_star([0,1,2,3]) - >>> G.add_star([10,11,12],weight=2) - - """ - nlist = list(nodes) - v = nlist[0] - edges = ((v, n) for n in nlist[1:]) - self.add_edges_from(edges, **attr) - - def add_path(self, nodes, **attr): - """Add a path. - - Parameters - ---------- - nodes : iterable container - A container of nodes. A path will be constructed from - the nodes (in order) and added to the graph. - attr : keyword arguments, optional (default= no attributes) - Attributes to add to every edge in path. - - See Also - -------- - add_star, add_cycle - - Examples - -------- - >>> G=nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_path([0,1,2,3]) - >>> G.add_path([10,11,12],weight=7) - - """ - nlist = list(nodes) - edges = zip(nlist[:-1], nlist[1:]) - self.add_edges_from(edges, **attr) - - def add_cycle(self, nodes, **attr): - """Add a cycle. - - Parameters - ---------- - nodes: iterable container - A container of nodes. A cycle will be constructed from - the nodes (in order) and added to the graph. - attr : keyword arguments, optional (default= no attributes) - Attributes to add to every edge in cycle. - - See Also - -------- - add_path, add_star - - Examples - -------- - >>> G=nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc - >>> G.add_cycle([0,1,2,3]) - >>> G.add_cycle([10,11,12],weight=7) - - """ - nlist = list(nodes) - edges = zip(nlist, nlist[1:] + [nlist[0]]) - self.add_edges_from(edges, **attr) - - def nbunch_iter(self, nbunch=None): - """Return an iterator of nodes contained in nbunch that are - also in the graph. - - The nodes in nbunch are checked for membership in the graph - and if not are silently ignored. - - Parameters - ---------- - nbunch : iterable container, optional (default=all nodes) - A container of nodes. The container will be iterated - through once. - - Returns - ------- - niter : iterator - An iterator over nodes in nbunch that are also in the graph. - If nbunch is None, iterate over all nodes in the graph. - - Raises - ------ - NetworkXError - If nbunch is not a node or or sequence of nodes. - If a node in nbunch is not hashable. - - See Also - -------- - Graph.__iter__ - - Notes - ----- - When nbunch is an iterator, the returned iterator yields values - directly from nbunch, becoming exhausted when nbunch is exhausted. - - To test whether nbunch is a single node, one can use - "if nbunch in self:", even after processing with this routine. - - If nbunch is not a node or a (possibly empty) sequence/iterator - or None, a NetworkXError is raised. Also, if any object in - nbunch is not hashable, a NetworkXError is raised. - """ - if nbunch is None: # include all nodes via iterator - bunch = iter(self.adj.keys()) - elif nbunch in self: # if nbunch is a single node - bunch = iter([nbunch]) - else: # if nbunch is a sequence of nodes - - def bunch_iter(nlist, adj): - try: - for n in nlist: - if n in adj: - yield n - except TypeError as e: - message = e.args[0] - import sys - - sys.stdout.write(message) - # capture error for non-sequence/iterator nbunch. - if "iter" in message: - raise NetworkXError("nbunch is not a node or a sequence of nodes.") - # capture error for unhashable node. - elif "hashable" in message: - raise NetworkXError( - "Node %s in the sequence nbunch is not a valid node." % n, - ) - else: - raise - - bunch = bunch_iter(nbunch, self.adj) - return bunch diff --git a/ConfigSpace/nx/exception.py b/ConfigSpace/nx/exception.py deleted file mode 100644 index 50e322c8..00000000 --- a/ConfigSpace/nx/exception.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -********** -Exceptions -**********. - -Base exceptions and errors for NetworkX. - -""" -from __future__ import annotations - -__author__ = """Aric Hagberg (hagberg@lanl.gov) -Pieter Swart (swart@lanl.gov) -Dan Schult(dschult@colgate.edu) -Loïc Séguin-C. """ -# Copyright (C) 2004-2011 by -# Aric Hagberg -# Dan Schult -# Pieter Swart -# All rights reserved. -# BSD license. -# - -# Exception handling - - -# the root of all Exceptions -class NetworkXException(Exception): - """Base class for exceptions in NetworkX.""" - - -class NetworkXError(NetworkXException): - """Exception for a serious error in NetworkX.""" - - -class NetworkXPointlessConcept(NetworkXException): - """Harary, F. and Read, R. "Is the Null Graph a Pointless Concept?" - In Graphs and Combinatorics Conference, George Washington University. - New York: Springer-Verlag, 1973. - """ - - -class NetworkXAlgorithmError(NetworkXException): - """Exception for unexpected termination of algorithms.""" - - -class NetworkXUnfeasible(NetworkXAlgorithmError): - """Exception raised by algorithms trying to solve a problem - instance that has no feasible solution. - """ - - -class NetworkXNoPath(NetworkXUnfeasible): - """Exception for algorithms that should return a path when running - on graphs where such a path does not exist. - """ - - -class NetworkXUnbounded(NetworkXAlgorithmError): - """Exception raised by algorithms trying to solve a maximization - or a minimization problem instance that is unbounded. - """ - - -class NetworkXNotImplemented(NetworkXException): - """Exception raised by algorithms not implemented for a type of graph.""" diff --git a/ConfigSpace/nx/release.py b/ConfigSpace/nx/release.py deleted file mode 100644 index 4b1a330d..00000000 --- a/ConfigSpace/nx/release.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (C) 2004-2011 by -# Aric Hagberg -# Dan Schult -# Pieter Swart -# All rights reserved. -# BSD license. -from __future__ import annotations - -import datetime -import os -import subprocess -import sys -import time - -basedir = os.path.abspath(os.path.split(__file__)[0]) - - -def get_revision(): - """Returns revision and vcs information, dynamically obtained.""" - vcs, revision, tag = None, None, None - - hgdir = os.path.join(basedir, "..", ".hg") - gitdir = os.path.join(basedir, "..", ".git") - - if os.path.isdir(hgdir): - vcs = "mercurial" - try: - p = subprocess.Popen(["hg", "id"], cwd=basedir, stdout=subprocess.PIPE) - except OSError: - # Could not run hg, even though this is a mercurial repository. - pass - else: - stdout = p.communicate()[0] - # Force strings instead of unicode. - x = list(map(str, stdout.decode().strip().split())) - - if len(x) == 0: - # Somehow stdout was empty. This can happen, for example, - # if you're running in a terminal which has redirected stdout. - # In this case, we do not use any revision/tag info. - pass - elif len(x) == 1: - # We don't have 'tip' or anything similar...so no tag. - revision = str(x[0]) - else: - revision = str(x[0]) - tag = str(x[1]) - - elif os.path.isdir(gitdir): - vcs = "git" - # For now, we are not bothering with revision and tag. - - vcs_info = (vcs, (revision, tag)) - - return revision, vcs_info - - -def get_info(dynamic=True): - # Date information - date_info = datetime.datetime.now() - date = time.asctime(date_info.timetuple()) - - revision, version, version_info, vcs_info = None, None, None, None - - import_failed = False - dynamic_failed = False - - if dynamic: - revision, vcs_info = get_revision() - if revision is None: - dynamic_failed = True - - if dynamic_failed or not dynamic: - # This is where most final releases of NetworkX will be. - # All info should come from version.py. If it does not exist, then - # no vcs information will be provided. - sys.path.insert(0, basedir) - try: - from version import date, date_info, vcs_info, version, version_info # type: ignore - except ImportError: - import_failed = True - vcs_info = (None, (None, None)) - else: - revision = vcs_info[1][0] - del sys.path[0] - - if import_failed or (dynamic and not dynamic_failed): - # We are here if: - # we failed to determine static versioning info, or - # we successfully obtained dynamic revision info - version = "".join([str(major), ".", str(minor)]) # noqa - if dev: - version += ".dev_" + date_info.strftime("%Y%m%d%H%M%S") - version_info = (name, major, minor, revision) # noqa - - return date, date_info, version, version_info, vcs_info - - -# Version information -name = "networkx" -major = "1" -minor = "8.1" - - -# Declare current release as a development release. -# Change to False before tagging a release; then change back. -dev = False - - -description = "Python package for creating and manipulating graphs and networks" - -long_description = """ -NetworkX is a Python package for the creation, manipulation, and -study of the structure, dynamics, and functions of complex networks. - -""" -license = "BSD" -authors = { - "Hagberg": ("Aric Hagberg", "hagberg@lanl.gov"), - "Schult": ("Dan Schult", "dschult@colgate.edu"), - "Swart": ("Pieter Swart", "swart@lanl.gov"), -} -maintainer = "NetworkX Developers" -maintainer_email = "networkx-discuss@googlegroups.com" -url = "http://networkx.lanl.gov/" -download_url = "http://networkx.lanl.gov/download/networkx" -platforms = ["Linux", "Mac OSX", "Windows", "Unix"] -keywords = [ - "Networks", - "Graph Theory", - "Mathematics", - "network", - "graph", - "discrete mathematics", - "math", -] -classifiers = [ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: BSD License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.6", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.1", - "Programming Language :: Python :: 3.2", - "Topic :: Software Development :: Libraries :: Python Modules", - "Topic :: Scientific/Engineering :: Bio-Informatics", - "Topic :: Scientific/Engineering :: Information Analysis", - "Topic :: Scientific/Engineering :: Mathematics", - "Topic :: Scientific/Engineering :: Physics", -] - -date, date_info, version, version_info, vcs_info = get_info() diff --git a/ConfigSpace/read_and_write/json.py b/ConfigSpace/read_and_write/json.py deleted file mode 100644 index 09815531..00000000 --- a/ConfigSpace/read_and_write/json.py +++ /dev/null @@ -1,721 +0,0 @@ -#!/usr/bin/env python -from __future__ import annotations - -import json - -from ConfigSpace import __version__ -from ConfigSpace.conditions import ( - AbstractCondition, - AndConjunction, - EqualsCondition, - GreaterThanCondition, - InCondition, - LessThanCondition, - NotEqualsCondition, - OrConjunction, -) -from ConfigSpace.configuration_space import ConfigurationSpace -from ConfigSpace.forbidden import ( - AbstractForbiddenComponent, - ForbiddenAndConjunction, - ForbiddenEqualsClause, - ForbiddenEqualsRelation, - ForbiddenGreaterThanRelation, - ForbiddenInClause, - ForbiddenLessThanRelation, - ForbiddenRelation, -) -from ConfigSpace.hyperparameters import ( - BetaFloatHyperparameter, - BetaIntegerHyperparameter, - CategoricalHyperparameter, - Constant, - Hyperparameter, - NormalFloatHyperparameter, - NormalIntegerHyperparameter, - OrdinalHyperparameter, - UniformFloatHyperparameter, - UniformIntegerHyperparameter, - UnParametrizedHyperparameter, -) - -JSON_FORMAT_VERSION = 0.4 - - -################################################################################ -# Builder for hyperparameters -def _build_constant(param: Constant) -> dict: - return { - "name": param.name, - "type": "constant", - "value": param.value, - } - - -def _build_unparametrized_hyperparameter(param: UnParametrizedHyperparameter) -> dict: - return { - "name": param.name, - "type": "unparametrized", - "value": param.value, - } - - -def _build_uniform_float(param: UniformFloatHyperparameter) -> dict: - return { - "name": param.name, - "type": "uniform_float", - "log": param.log, - "lower": param.lower, - "upper": param.upper, - "default": param.default_value, - "q": param.q, - } - - -def _build_normal_float(param: NormalFloatHyperparameter) -> dict: - return { - "name": param.name, - "type": "normal_float", - "log": param.log, - "mu": param.mu, - "sigma": param.sigma, - "default": param.default_value, - "lower": param.lower, - "upper": param.upper, - "q": param.q, - } - - -def _build_beta_float(param: BetaFloatHyperparameter) -> dict: - return { - "name": param.name, - "type": "beta_float", - "log": param.log, - "alpha": param.alpha, - "beta": param.beta, - "lower": param.lower, - "upper": param.upper, - "default": param.default_value, - "q": param.q, - } - - -def _build_uniform_int(param: UniformIntegerHyperparameter) -> dict: - return { - "name": param.name, - "type": "uniform_int", - "log": param.log, - "lower": param.lower, - "upper": param.upper, - "default": param.default_value, - "q": param.q, - } - - -def _build_normal_int(param: NormalIntegerHyperparameter) -> dict: - return { - "name": param.name, - "type": "normal_int", - "log": param.log, - "mu": param.mu, - "sigma": param.sigma, - "lower": param.lower, - "upper": param.upper, - "default": param.default_value, - "q": param.q, - } - - -def _build_beta_int(param: BetaIntegerHyperparameter) -> dict: - return { - "name": param.name, - "type": "beta_int", - "log": param.log, - "alpha": param.alpha, - "beta": param.beta, - "lower": param.lower, - "upper": param.upper, - "default": param.default_value, - "q": param.q, - } - - -def _build_categorical(param: CategoricalHyperparameter) -> dict: - return { - "name": param.name, - "type": "categorical", - "choices": param.choices, - "default": param.default_value, - "weights": param.weights, - } - - -def _build_ordinal(param: OrdinalHyperparameter) -> dict: - return { - "name": param.name, - "type": "ordinal", - "sequence": param.sequence, - "default": param.default_value, - } - - -################################################################################ -# Builder for Conditions -def _build_condition(condition: AbstractCondition) -> dict: - methods = { - AndConjunction: _build_and_conjunction, - OrConjunction: _build_or_conjunction, - InCondition: _build_in_condition, - EqualsCondition: _build_equals_condition, - NotEqualsCondition: _build_not_equals_condition, - GreaterThanCondition: _build_greater_than_condition, - LessThanCondition: _build_less_than_condition, - } - return methods[type(condition)](condition) - - -def _build_and_conjunction(conjunction: AndConjunction) -> dict: - child = conjunction.get_descendant_literal_conditions()[0].child.name - cond_list = [] - for component in conjunction.components: - cond_list.append(_build_condition(component)) - return { - "child": child, - "type": "AND", - "conditions": cond_list, - } - - -def _build_or_conjunction(conjunction: OrConjunction) -> dict: - child = conjunction.get_descendant_literal_conditions()[0].child.name - cond_list = [] - for component in conjunction.components: - cond_list.append(_build_condition(component)) - return { - "child": child, - "type": "OR", - "conditions": cond_list, - } - - -def _build_in_condition(condition: InCondition) -> dict: - child = condition.child.name - parent = condition.parent.name - values = list(condition.values) - return { - "child": child, - "parent": parent, - "type": "IN", - "values": values, - } - - -def _build_equals_condition(condition: EqualsCondition) -> dict: - child = condition.child.name - parent = condition.parent.name - value = condition.value - return { - "child": child, - "parent": parent, - "type": "EQ", - "value": value, - } - - -def _build_not_equals_condition(condition: NotEqualsCondition) -> dict: - child = condition.child.name - parent = condition.parent.name - value = condition.value - return { - "child": child, - "parent": parent, - "type": "NEQ", - "value": value, - } - - -def _build_greater_than_condition(condition: GreaterThanCondition) -> dict: - child = condition.child.name - parent = condition.parent.name - value = condition.value - return { - "child": child, - "parent": parent, - "type": "GT", - "value": value, - } - - -def _build_less_than_condition(condition: LessThanCondition) -> dict: - child = condition.child.name - parent = condition.parent.name - value = condition.value - return { - "child": child, - "parent": parent, - "type": "LT", - "value": value, - } - - -################################################################################ -# Builder for forbidden -def _build_forbidden(clause: AbstractForbiddenComponent) -> dict: - methods = { - ForbiddenEqualsClause: _build_forbidden_equals_clause, - ForbiddenInClause: _build_forbidden_in_clause, - ForbiddenAndConjunction: _build_forbidden_and_conjunction, - ForbiddenEqualsRelation: _build_forbidden_relation, - ForbiddenLessThanRelation: _build_forbidden_relation, - ForbiddenGreaterThanRelation: _build_forbidden_relation, - } - return methods[type(clause)](clause) - - -def _build_forbidden_equals_clause(clause: ForbiddenEqualsClause) -> dict: - return { - "name": clause.hyperparameter.name, - "type": "EQUALS", - "value": clause.value, - } - - -def _build_forbidden_in_clause(clause: ForbiddenInClause) -> dict: - return { - "name": clause.hyperparameter.name, - "type": "IN", - # The values are a set, but a set cannot be serialized to json - "values": list(clause.values), - } - - -def _build_forbidden_and_conjunction(clause: ForbiddenAndConjunction) -> dict: - return { - "name": clause.get_descendant_literal_clauses()[0].hyperparameter.name, - "type": "AND", - "clauses": [_build_forbidden(component) for component in clause.components], - } - - -def _build_forbidden_relation(clause: ForbiddenRelation) -> dict: - if isinstance(clause, ForbiddenLessThanRelation): - lambda_ = "LESS" - elif isinstance(clause, ForbiddenEqualsRelation): - lambda_ = "EQUALS" - elif isinstance(clause, ForbiddenGreaterThanRelation): - lambda_ = "GREATER" - else: - raise ValueError("Unknown relation '%s'" % type(clause)) - - return { - "left": clause.left.name, - "right": clause.right.name, - "type": "RELATION", - "lambda": lambda_, - } - - -################################################################################ -def write(configuration_space: ConfigurationSpace, indent: int = 2) -> str: - """ - Create a string representation of a - :class:`~ConfigSpace.configuration_space.ConfigurationSpace` in json format. - This string can be written to file. - - .. code:: python - - from ConfigSpace import ConfigurationSpace - from ConfigSpace.read_and_write import json as cs_json - - cs = ConfigurationSpace({"a": [1, 2, 3]}) - - with open('configspace.json', 'w') as f: - f.write(cs_json.write(cs)) - - Parameters - ---------- - configuration_space : :class:`~ConfigSpace.configuration_space.ConfigurationSpace` - a configuration space, which should be written to file. - indent : int - number of whitespaces to use as indent - - Returns - ------- - str - String representation of the configuration space, - which will be written to file - """ - if not isinstance(configuration_space, ConfigurationSpace): - raise TypeError( - "pcs_parser.write expects an instance of {}, " - "you provided '{}'".format(ConfigurationSpace, type(configuration_space)), - ) - - hyperparameters = [] - conditions = [] - forbiddens = [] - - for hyperparameter in configuration_space.values(): - if isinstance(hyperparameter, Constant): - hyperparameters.append(_build_constant(hyperparameter)) - elif isinstance(hyperparameter, UnParametrizedHyperparameter): - hyperparameters.append(_build_unparametrized_hyperparameter(hyperparameter)) - elif isinstance(hyperparameter, BetaFloatHyperparameter): - hyperparameters.append(_build_beta_float(hyperparameter)) - elif isinstance(hyperparameter, UniformFloatHyperparameter): - hyperparameters.append(_build_uniform_float(hyperparameter)) - elif isinstance(hyperparameter, NormalFloatHyperparameter): - hyperparameters.append(_build_normal_float(hyperparameter)) - elif isinstance(hyperparameter, BetaIntegerHyperparameter): - hyperparameters.append(_build_beta_int(hyperparameter)) - elif isinstance(hyperparameter, UniformIntegerHyperparameter): - hyperparameters.append(_build_uniform_int(hyperparameter)) - elif isinstance(hyperparameter, NormalIntegerHyperparameter): - hyperparameters.append(_build_normal_int(hyperparameter)) - elif isinstance(hyperparameter, CategoricalHyperparameter): - hyperparameters.append(_build_categorical(hyperparameter)) - elif isinstance(hyperparameter, OrdinalHyperparameter): - hyperparameters.append(_build_ordinal(hyperparameter)) - else: - raise TypeError( - "Unknown type: {} ({})".format( - type(hyperparameter), - hyperparameter, - ), - ) - - for condition in configuration_space.get_conditions(): - conditions.append(_build_condition(condition)) - - for forbidden_clause in configuration_space.get_forbiddens(): - forbiddens.append(_build_forbidden(forbidden_clause)) - - rval: dict = {} - if configuration_space.name is not None: - rval["name"] = configuration_space.name - rval["hyperparameters"] = hyperparameters - rval["conditions"] = conditions - rval["forbiddens"] = forbiddens - rval["python_module_version"] = __version__ - rval["json_format_version"] = JSON_FORMAT_VERSION - - return json.dumps(rval, indent=indent) - - -################################################################################ -def read(jason_string: str) -> ConfigurationSpace: - """ - Create a configuration space definition from a json string. - - .. code:: python - - from ConfigSpace import ConfigurationSpace - from ConfigSpace.read_and_write import json as cs_json - - cs = ConfigurationSpace({"a": [1, 2, 3]}) - - cs_string = cs_json.write(cs) - with open('configspace.json', 'w') as f: - f.write(cs_string) - - with open('configspace.json', 'r') as f: - json_string = f.read() - config = cs_json.read(json_string) - - - Parameters - ---------- - jason_string : str - A json string representing a configuration space definition - - Returns - ------- - :class:`~ConfigSpace.configuration_space.ConfigurationSpace` - The deserialized ConfigurationSpace object - """ - jason = json.loads(jason_string) - if "name" in jason: - configuration_space = ConfigurationSpace(name=jason["name"]) - else: - configuration_space = ConfigurationSpace() - - for hyperparameter in jason["hyperparameters"]: - configuration_space.add_hyperparameter( - _construct_hyperparameter( - hyperparameter, - ), - ) - - for condition in jason["conditions"]: - configuration_space.add_condition( - _construct_condition( - condition, - configuration_space, - ), - ) - - for forbidden in jason["forbiddens"]: - configuration_space.add_forbidden_clause( - _construct_forbidden( - forbidden, - configuration_space, - ), - ) - - return configuration_space - - -def _construct_hyperparameter(hyperparameter: dict) -> Hyperparameter: # noqa: PLR0911 - hp_type = hyperparameter["type"] - name = hyperparameter["name"] - if hp_type == "constant": - return Constant( - name=name, - value=hyperparameter["value"], - ) - - if hp_type == "unparametrized": - return UnParametrizedHyperparameter( - name=name, - value=hyperparameter["value"], - ) - - if hp_type == "uniform_float": - return UniformFloatHyperparameter( - name=name, - log=hyperparameter["log"], - lower=hyperparameter["lower"], - upper=hyperparameter["upper"], - default_value=hyperparameter["default"], - # Backwards compatibily issue - # https://github.com/automl/ConfigSpace/issues/325 - q=hyperparameter.get("q", None), - ) - - if hp_type == "normal_float": - return NormalFloatHyperparameter( - name=name, - log=hyperparameter["log"], - mu=hyperparameter["mu"], - sigma=hyperparameter["sigma"], - lower=hyperparameter["lower"], - upper=hyperparameter["upper"], - default_value=hyperparameter["default"], - # Backwards compatibily issue - # https://github.com/automl/ConfigSpace/issues/325 - q=hyperparameter.get("q", None), - ) - - if hp_type == "beta_float": - return BetaFloatHyperparameter( - name=name, - alpha=hyperparameter["alpha"], - beta=hyperparameter["beta"], - lower=hyperparameter["lower"], - upper=hyperparameter["upper"], - log=hyperparameter["log"], - # Backwards compatibily issue - # https://github.com/automl/ConfigSpace/issues/325 - q=hyperparameter.get("q", None), - default_value=hyperparameter["default"], - ) - - if hp_type == "uniform_int": - return UniformIntegerHyperparameter( - name=name, - log=hyperparameter["log"], - lower=hyperparameter["lower"], - upper=hyperparameter["upper"], - default_value=hyperparameter["default"], - # Backwards compatibily issue - # https://github.com/automl/ConfigSpace/issues/325 - q=hyperparameter.get("q", None), - ) - - if hp_type == "normal_int": - return NormalIntegerHyperparameter( - name=name, - mu=hyperparameter["mu"], - sigma=hyperparameter["sigma"], - log=hyperparameter["log"], - lower=hyperparameter["lower"], - upper=hyperparameter["upper"], - default_value=hyperparameter["default"], - # Backwards compatibily issue - # https://github.com/automl/ConfigSpace/issues/325 - q=hyperparameter.get("q", None), - ) - - if hp_type == "beta_int": - return BetaIntegerHyperparameter( - name=name, - alpha=hyperparameter["alpha"], - beta=hyperparameter["beta"], - lower=hyperparameter["lower"], - upper=hyperparameter["upper"], - log=hyperparameter["log"], - # Backwards compatibily issue - # https://github.com/automl/ConfigSpace/issues/325 - q=hyperparameter.get("q", None), - default_value=hyperparameter["default"], - ) - - if hp_type == "categorical": - return CategoricalHyperparameter( - name=name, - choices=hyperparameter["choices"], - default_value=hyperparameter["default"], - weights=hyperparameter.get("weights"), - ) - - if hp_type == "ordinal": - return OrdinalHyperparameter( - name=name, - sequence=hyperparameter["sequence"], - default_value=hyperparameter["default"], - ) - - raise ValueError(hp_type) - - -def _construct_condition( - condition: dict, - cs: ConfigurationSpace, -) -> AbstractCondition: - condition_type = condition["type"] - methods = { - "AND": _construct_and_condition, - "OR": _construct_or_condition, - "IN": _construct_in_condition, - "EQ": _construct_eq_condition, - "NEQ": _construct_neq_condition, - "GT": _construct_gt_condition, - "LT": _construct_lt_condition, - } - return methods[condition_type](condition, cs) - - -def _construct_and_condition( - condition: dict, - cs: ConfigurationSpace, -) -> AndConjunction: - conditions = [_construct_condition(cond, cs) for cond in condition["conditions"]] - return AndConjunction(*conditions) - - -def _construct_or_condition( - condition: dict, - cs: ConfigurationSpace, -) -> OrConjunction: - conditions = [_construct_condition(cond, cs) for cond in condition["conditions"]] - return OrConjunction(*conditions) - - -def _construct_in_condition( - condition: dict, - cs: ConfigurationSpace, -) -> InCondition: - return InCondition( - child=cs[condition["child"]], - parent=cs[condition["parent"]], - values=condition["values"], - ) - - -def _construct_eq_condition( - condition: dict, - cs: ConfigurationSpace, -) -> EqualsCondition: - return EqualsCondition( - child=cs[condition["child"]], - parent=cs[condition["parent"]], - value=condition["value"], - ) - - -def _construct_neq_condition( - condition: dict, - cs: ConfigurationSpace, -) -> NotEqualsCondition: - return NotEqualsCondition( - child=cs[condition["child"]], - parent=cs[condition["parent"]], - value=condition["value"], - ) - - -def _construct_gt_condition( - condition: dict, - cs: ConfigurationSpace, -) -> GreaterThanCondition: - return GreaterThanCondition( - child=cs[condition["child"]], - parent=cs[condition["parent"]], - value=condition["value"], - ) - - -def _construct_lt_condition( - condition: dict, - cs: ConfigurationSpace, -) -> LessThanCondition: - return LessThanCondition( - child=cs[condition["child"]], - parent=cs[condition["parent"]], - value=condition["value"], - ) - - -def _construct_forbidden( - clause: dict, - cs: ConfigurationSpace, -) -> AbstractForbiddenComponent: - forbidden_type = clause["type"] - methods = { - "EQUALS": _construct_forbidden_equals, - "IN": _construct_forbidden_in, - "AND": _construct_forbidden_and, - "RELATION": _construct_forbidden_equals, - } - return methods[forbidden_type](clause, cs) - - -def _construct_forbidden_equals( - clause: dict, - cs: ConfigurationSpace, -) -> ForbiddenEqualsClause: - return ForbiddenEqualsClause(hyperparameter=cs[clause["name"]], value=clause["value"]) - - -def _construct_forbidden_in( - clause: dict, - cs: ConfigurationSpace, -) -> ForbiddenEqualsClause: - return ForbiddenInClause(hyperparameter=cs[clause["name"]], values=clause["values"]) - - -def _construct_forbidden_and( - clause: dict, - cs: ConfigurationSpace, -) -> ForbiddenAndConjunction: - clauses = [_construct_forbidden(cl, cs) for cl in clause["clauses"]] - return ForbiddenAndConjunction(*clauses) - - -def _construct_forbidden_relation( # pyright: ignore - clause: dict, - cs: ConfigurationSpace, -) -> ForbiddenRelation: - left = cs[clause["left"]] - right = cs[clause["right"]] - - if clause["lambda"] == "LESS": - return ForbiddenLessThanRelation(left, right) - - if clause["lambda"] == "EQUALS": - return ForbiddenEqualsRelation(left, right) - - if clause["lambda"] == "GREATER": - return ForbiddenGreaterThanRelation(left, right) - - raise ValueError("Unknown relation '%s'" % clause["lambda"]) diff --git a/ConfigSpace/util.py b/ConfigSpace/util.py deleted file mode 100644 index 120bfce3..00000000 --- a/ConfigSpace/util.py +++ /dev/null @@ -1,710 +0,0 @@ -# Copyright (c) 2014-2016, ConfigSpace developers -# Matthias Feurer -# Katharina Eggensperger -# and others (see commit history). -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from __future__ import annotations - -import copy -from collections import deque -from typing import Any, Iterator, cast - -import numpy as np - -import ConfigSpace.c_util -from ConfigSpace import Configuration, ConfigurationSpace -from ConfigSpace.exceptions import ActiveHyperparameterNotSetError, ForbiddenValueError -from ConfigSpace.hyperparameters import ( - CategoricalHyperparameter, - Constant, - Hyperparameter, - NumericalHyperparameter, - OrdinalHyperparameter, - UniformFloatHyperparameter, - UniformIntegerHyperparameter, -) - - -def impute_inactive_values( - configuration: Configuration, - strategy: str | float = "default", -) -> Configuration: - """Impute inactive parameters. - - Iterate through the hyperparameters of a ``Configuration`` and set the - values of the inactive hyperparamters to their default values if the choosen - ``strategy`` is 'default'. Otherwise ``strategy`` contains a float number. - Set the hyperparameters' value to this number. - - - Parameters - ---------- - configuration : :class:`~ConfigSpace.configuration_space.Configuration` - For this configuration inactive values will be imputed. - strategy : (str, float, optional) - The imputation strategy. Defaults to 'default' - If 'default', replace inactive parameters by their default. - If float, replace inactive parameters by the given float value, - which should be able to be splitted apart by a tree-based model. - - Returns - ------- - :class:`~ConfigSpace.configuration_space.Configuration` - A new configuration with the imputed values. - In this new configuration inactive values are included. - """ - values = {} - for hp in configuration.config_space.values(): - value = configuration.get(hp.name) - if value is None: - if strategy == "default": - new_value = hp.default_value - - elif isinstance(strategy, float): - new_value = strategy - - else: - raise ValueError(f"Unknown imputation strategy {strategy}") - - value = new_value - - values[hp.name] = value - - return Configuration( - configuration.config_space, - values=values, - allow_inactive_with_values=True, - ) - - -def get_one_exchange_neighbourhood( - configuration: Configuration, - seed: int, - num_neighbors: int = 4, - stdev: float = 0.2, -) -> Iterator[Configuration]: - """ - Return all configurations in a one-exchange neighborhood. - - The method is implemented as defined by: - Frank Hutter, Holger H. Hoos and Kevin Leyton-Brown - Sequential Model-Based Optimization for General Algorithm Configuration - In Proceedings of the conference on Learning and Intelligent - Optimization(LION 5) - - Parameters - ---------- - configuration : :class:`~ConfigSpace.configuration_space.Configuration` - for this Configuration object ``num_neighbors`` neighbors are computed - seed : int - Sets the random seed to a fixed value - num_neighbors : (int, optional) - number of configurations, which are sampled from the neighbourhood - of the input configuration - stdev : (float, optional) - The standard deviation is used to determine the neigbours of - :class:`~ConfigSpace.hyperparameters.UniformFloatHyperparameter` and - :class:`~ConfigSpace.hyperparameters.UniformIntegerHyperparameter`. - - Returns - ------- - Iterator - It contains configurations, with values being situated around - the given configuration. - - """ - random = np.random.RandomState(seed) - hyperparameters_list = list(configuration.config_space._hyperparameters.keys()) - hyperparameters_list_length = len(hyperparameters_list) - hyperparameters_used = [ - hp.name - for hp in configuration.config_space.values() - if ( - hp.get_num_neighbors(configuration.get(hp.name)) == 0 - and configuration.get(hp.name) is not None - ) - ] - number_of_usable_hyperparameters = sum(np.isfinite(configuration.get_array())) - n_neighbors_per_hp = { - hp.name: num_neighbors - if ( - isinstance(hp, NumericalHyperparameter) - and hp.get_num_neighbors(configuration.get(hp.name)) > num_neighbors - ) - else hp.get_num_neighbors(configuration.get(hp.name)) - for hp in configuration.config_space.values() - } - - finite_neighbors_stack: dict[str, list[Hyperparameter]] = {} - configuration_space = configuration.config_space - - while len(hyperparameters_used) < number_of_usable_hyperparameters: - index = int(random.randint(hyperparameters_list_length)) - hp_name = hyperparameters_list[index] - if n_neighbors_per_hp[hp_name] == 0: - continue - - else: - neighbourhood = [] - number_of_sampled_neighbors = 0 - array = configuration.get_array() # type: np.ndarray - value = array[index] # type: float - - # Check for NaNs (inactive value) - if value != value: - continue - - iteration = 0 - hp = configuration_space[hp_name] - num_neighbors_for_hp = hp.get_num_neighbors(configuration.get(hp_name)) - while True: - # Obtain neigbors differently for different possible numbers of - # neighbors - if num_neighbors_for_hp == 0 or iteration > 100: - break - elif np.isinf(num_neighbors_for_hp): - if number_of_sampled_neighbors >= 1: - break - if isinstance(hp, UniformFloatHyperparameter): - neighbor = hp.get_neighbors(value, random, number=1, std=stdev)[0] - else: - neighbor = hp.get_neighbors(value, random, number=1)[0] - else: - if iteration > 0: - break - if hp_name not in finite_neighbors_stack: - if isinstance(hp, UniformIntegerHyperparameter): - neighbors = hp.get_neighbors( - value, - random, - number=n_neighbors_per_hp[hp_name], - std=stdev, - ) - else: - neighbors = hp.get_neighbors(value, random) - random.shuffle(neighbors) - finite_neighbors_stack[hp_name] = neighbors - else: - neighbors = finite_neighbors_stack[hp_name] - neighbor = neighbors.pop() - - # Check all newly obtained neigbors - new_array = array.copy() - new_array = ConfigSpace.c_util.change_hp_value( - configuration_space=configuration_space, - configuration_array=new_array, - hp_name=hp_name, - hp_value=neighbor, - index=index, - ) - try: - # Populating a configuration from an array does not check - # if it is a legal configuration - check this (slow) - new_configuration = Configuration( - configuration_space, - vector=new_array, - ) # type: Configuration - # Only rigorously check every tenth configuration ( - # because moving around in the neighborhood should - # just work!) - if random.random() > 0.95: - new_configuration.is_valid_configuration() - else: - configuration_space._check_forbidden(new_array) - neighbourhood.append(new_configuration) - except ForbiddenValueError: - pass - - iteration += 1 - if len(neighbourhood) > 0: - number_of_sampled_neighbors += 1 - - # Some infinite loop happened and no valid neighbor was found OR - # no valid neighbor is available for a categorical - if len(neighbourhood) == 0: - hyperparameters_used.append(hp_name) - n_neighbors_per_hp[hp_name] = 0 - hyperparameters_used.append(hp_name) - elif hp_name not in hyperparameters_used: - n_ = neighbourhood.pop() - n_neighbors_per_hp[hp_name] -= 1 - if n_neighbors_per_hp[hp_name] == 0: - hyperparameters_used.append(hp_name) - yield n_ - - -def get_random_neighbor(configuration: Configuration, seed: int) -> Configuration: - """ - Draw a random neighbor by changing one parameter of a configuration. - - - If the parameter is categorical, it changes it to another value. - - If the parameter is ordinal, it changes it to the next higher or - lower value. - - If parameter is a float, draw a random sample - - If changing a parameter activates new parameters or deactivates - previously active parameters, the configuration will be rejected. If more - than 10000 configurations were rejected, this function raises a - ValueError. - - Parameters - ---------- - configuration : :class:`~ConfigSpace.configuration_space.Configuration` - a configuration for which a random neigbour is calculated - seed : int - Used to generate a random state. - - Returns - ------- - :class:`~ConfigSpace.configuration_space.Configuration` - The new neighbor - - """ - random = np.random.RandomState(seed) - rejected = True - values = copy.deepcopy(dict(configuration)) - new_configuration = None - - while rejected: - # First, choose an active hyperparameter - active = False - iteration = 0 - hp = None - value = None - while not active: - iteration += 1 - rand_idx = random.randint(0, len(configuration)) if len(configuration) > 1 else 0 - - value = configuration.get_array()[rand_idx] - if np.isfinite(value): - active = True - - hp_name = configuration.config_space.get_hyperparameter_by_idx(rand_idx) - hp = configuration.config_space[hp_name] - - # Only choose if there is a possibility of finding a neigboor - if not hp.has_neighbors(): - active = False - - if iteration > 10000: - raise ValueError("Probably caught in an infinite loop.") - - assert hp is not None - assert value is not None - - # Get a neighboor and adapt the rest of the configuration if necessary - neighbor = hp.get_neighbors(value, random, number=1, transform=True)[0] - previous_value = values[hp.name] - values[hp.name] = neighbor - - try: - new_configuration = Configuration(configuration.config_space, values=values) - rejected = False - except ValueError: - values[hp.name] = previous_value - - assert new_configuration is not None - return new_configuration - - -def deactivate_inactive_hyperparameters( - configuration: dict, - configuration_space: ConfigurationSpace, - vector: None | np.ndarray = None, -) -> Configuration: - """ - Remove inactive hyperparameters from a given configuration. - - Parameters - ---------- - configuration : dict - a configuration as a dictionary. Key: name of the hyperparameter. - Value: value of this hyperparamter - configuration from which inactive hyperparameters will be removed - configuration_space : :class:`~ConfigSpace.configuration_space.ConfigurationSpace` - The defined configuration space. It is necessary to find the inactive - hyperparameters by iterating through the conditions of the - configuration space. - vector : (np.ndarray, optional) - Efficient represantation of a configuration. Either ``configuration`` or - ``vector`` must be specified. If both are specified only - ``configuration`` will be used. - - Returns - ------- - :class:`~ConfigSpace.configuration_space.Configuration` - A configuration that is equivalent to the given configuration, except - that inactivate hyperparameters have been removed. - - """ - hyperparameters = configuration_space.values() - config = Configuration( - configuration_space=configuration_space, - values=configuration, - vector=vector, - allow_inactive_with_values=True, - ) - - hps: deque[Hyperparameter] = deque() - - unconditional_hyperparameters = configuration_space.get_all_unconditional_hyperparameters() - hyperparameters_with_children = [] - for uhp in unconditional_hyperparameters: - children = configuration_space._children_of[uhp] - if len(children) > 0: - hyperparameters_with_children.append(uhp) - hps.extendleft(hyperparameters_with_children) - - inactive = set() - - while len(hps) > 0: - hp = hps.pop() - children = configuration_space._children_of[hp] - for child in children: - conditions = configuration_space._parent_conditions_of[child.name] - for condition in conditions: - if not condition.evaluate_vector(config.get_array()): - dic = dict(config) - try: - del dic[child.name] - except KeyError: - continue - config = Configuration( - configuration_space=configuration_space, - values=dic, - allow_inactive_with_values=True, - ) - inactive.add(child.name) - hps.appendleft(child.name) - - for hp in hyperparameters: - if hp.name in inactive: - dic = dict(config) - try: - del dic[hp.name] - except KeyError: - continue - config = Configuration( - configuration_space=configuration_space, - values=dic, - allow_inactive_with_values=True, - ) - - return Configuration(configuration_space, values=dict(config)) - - -def fix_types( - configuration: dict[str, Any], - configuration_space: ConfigurationSpace, -) -> dict[str, Any]: - """ - Iterate over all hyperparameters in the ConfigSpace - and fix the types of the parameter values in configuration. - - Parameters - ---------- - configuration : dict - a configuration as a dictionary. Key: name of the hyperparameter. - Value: value of this hyperparamter - configuration_space : :class:`~ConfigSpace.configuration_space.ConfigurationSpace` - Configuration space which knows the types for all parameter values - - Returns - ------- - dict - configuration with fixed types of parameter values - """ - - def fix_type_from_candidates(value: Any, candidates: list[Any]) -> Any: - result = [c for c in candidates if str(value) == str(c)] - if len(result) != 1: - raise ValueError( - "Parameter value {} cannot be matched to candidates {}. " - "Either none or too many matching candidates.".format(str(value), candidates), - ) - return result[0] - - for param in configuration_space.values(): - param_name = param.name - if configuration.get(param_name) is not None: - if isinstance(param, (CategoricalHyperparameter)): - configuration[param_name] = fix_type_from_candidates( - configuration[param_name], - param.choices, - ) - elif isinstance(param, (OrdinalHyperparameter)): - configuration[param_name] = fix_type_from_candidates( - configuration[param_name], - param.sequence, - ) - elif isinstance(param, Constant): - configuration[param_name] = fix_type_from_candidates( - configuration[param_name], - [param.value], - ) - elif isinstance(param, UniformFloatHyperparameter): - configuration[param_name] = float(configuration[param_name]) - elif isinstance(param, UniformIntegerHyperparameter): - configuration[param_name] = int(configuration[param_name]) - else: - raise TypeError(f"Unknown hyperparameter type {type(param)}") - return configuration - - -def generate_grid( - configuration_space: ConfigurationSpace, - num_steps_dict: dict[str, int] | None = None, -) -> list[Configuration]: - """ - Generates a grid of Configurations for a given ConfigurationSpace. - Can be used, for example, for grid search. - - Parameters - ---------- - configuration_space: :class:`~ConfigSpace.configuration_space.ConfigurationSpace` - The Configuration space over which to create a grid of HyperParameter Configuration values. - It knows the types for all parameter values. - - num_steps_dict: dict - A dict containing the number of points to divide the grid side formed by Hyperparameters - which are either of type UniformFloatHyperparameter or type UniformIntegerHyperparameter. - The keys in the dict should be the names of the corresponding Hyperparameters and the values - should be the number of points to divide the grid side formed by the corresponding - Hyperparameter in to. - - Returns - ------- - list - List containing Configurations. It is a cartesian product of tuples of - HyperParameter values. - Each tuple lists the possible values taken by the corresponding HyperParameter. - Within the cartesian product, in each element, the ordering of HyperParameters is the same - for the OrderedDict within the ConfigurationSpace. - """ - - def get_value_set(num_steps_dict: dict[str, int] | None, hp_name: str) -> tuple: - """ - Gets values along the grid for a particular hyperparameter. - - Uses the num_steps_dict to determine number of grid values for UniformFloatHyperparameter - and UniformIntegerHyperparameter. If these values are not present in num_steps_dict, the - quantization factor, q, of these classes will be used to divide the grid. NOTE: When q - is used if it is None, a ValueError is raised. - - Parameters - ---------- - num_steps_dict: dict - Same description as above - - hp_name: str - Hyperparameter name - - Returns - ------- - tuple - Holds grid values for the given hyperparameter - - """ - param = configuration_space[hp_name] - if isinstance(param, (CategoricalHyperparameter)): - return cast(tuple, param.choices) - - if isinstance(param, (OrdinalHyperparameter)): - return cast(tuple, param.sequence) - - if isinstance(param, Constant): - return (param.value,) - - if isinstance(param, UniformFloatHyperparameter): - if param.log: - lower, upper = np.log([param.lower, param.upper]) - else: - lower, upper = param.lower, param.upper - - if num_steps_dict is not None and param.name in num_steps_dict: - num_steps = num_steps_dict[param.name] - grid_points = np.linspace(lower, upper, num_steps) - - # check for log and for rounding issues - elif param.q is not None: - grid_points = np.arange(lower, upper + param.q, param.q) - else: - raise ValueError( - "num_steps_dict is None or doesn't contain the number of points" - f" to divide {param.name} into. And its quantization factor " - "is None. Please provide/set one of these values.", - ) - - if param.log: - grid_points = np.exp(grid_points) - - # Avoiding rounding off issues - if grid_points[0] < param.lower: - grid_points[0] = param.lower - if grid_points[-1] > param.upper: - grid_points[-1] = param.upper - - return tuple(grid_points) - - if isinstance(param, UniformIntegerHyperparameter): - if param.log: - lower, upper = np.log([param.lower, param.upper]) - else: - lower, upper = param.lower, param.upper - - if num_steps_dict is not None and param.name in num_steps_dict: - num_steps = num_steps_dict[param.name] - grid_points = np.linspace(lower, upper, num_steps) - - # check for log and for rounding issues - elif param.q is not None: - grid_points = np.arange(lower, upper + param.q, param.q) - else: - raise ValueError( - "num_steps_dict is None or doesn't contain the number of points " - f"to divide {param.name} into. And its quantization factor " - "is None. Please provide/set one of these values.", - ) - - if param.log: - grid_points = np.exp(grid_points) - grid_points = np.round(grid_points).astype(int) - - # Avoiding rounding off issues - if grid_points[0] < param.lower: - grid_points[0] = param.lower - if grid_points[-1] > param.upper: - grid_points[-1] = param.upper - - return tuple(grid_points) - - raise TypeError(f"Unknown hyperparameter type {type(param)}") - - def get_cartesian_product(value_sets: list[tuple], hp_names: list[str]) -> list[dict[str, Any]]: - """ - Returns a grid for a subspace of the configuration with given hyperparameters - and their grid values. - - Takes a list of tuples of grid values of the hyperparameters and list of - hyperparameter names. The outer list iterates over the hyperparameters corresponding - to the order in the list of hyperparameter names. - The inner tuples contain grid values of the hyperparameters for each hyperparameter. - - Parameters - ---------- - value_sets: list of tuples - Same description as return value of get_value_set() - - hp_names: list of strs - List of hyperparameter names - - Returns - ------- - list of dicts - List of configuration dicts - """ - import itertools - - if len(value_sets) == 0: - # Edge case - return [] - - grid = [] - for element in itertools.product(*value_sets): - config_dict = dict(zip(hp_names, element)) - grid.append(config_dict) - - return grid - - # list of tuples: each tuple within is the grid values to be taken on by a Hyperparameter - value_sets = [] - hp_names = [] - - # Get HP names and allowed grid values they can take for the HPs at the top - # level of ConfigSpace tree - for hp_name in configuration_space._children["__HPOlib_configuration_space_root__"]: - value_sets.append(get_value_set(num_steps_dict, hp_name)) - hp_names.append(hp_name) - - # Create a Cartesian product of above allowed values for the HPs. Hold them in an - # "unchecked" deque because some of the conditionally dependent HPs may become active - # for some of the elements of the Cartesian product and in these cases creating a - # Configuration would throw an Error (see below). - # Creates a deque of Configuration dicts - unchecked_grid_pts = deque(get_cartesian_product(value_sets, hp_names)) - checked_grid_pts = [] - - while len(unchecked_grid_pts) > 0: - try: - grid_point = Configuration(configuration_space, unchecked_grid_pts[0]) - checked_grid_pts.append(grid_point) - - # When creating a configuration that violates a forbidden clause, simply skip it - except ForbiddenValueError: - unchecked_grid_pts.popleft() - continue - - except ActiveHyperparameterNotSetError: - value_sets = [] - hp_names = [] - new_active_hp_names = [] - - # "for" loop over currently active HP names - for hp_name in unchecked_grid_pts[0]: - value_sets.append( - (unchecked_grid_pts[0][hp_name],), - ) - hp_names.append(hp_name) - # Checks if the conditionally dependent children of already active - # HPs are now active - for new_hp_name in configuration_space._children[hp_name]: - if ( - new_hp_name not in new_active_hp_names - and new_hp_name not in unchecked_grid_pts[0] - ): - all_cond_ = True - for cond in configuration_space._parent_conditions_of[new_hp_name]: - if not cond.evaluate(unchecked_grid_pts[0]): - all_cond_ = False - if all_cond_: - new_active_hp_names.append(new_hp_name) - - for hp_name in new_active_hp_names: - value_sets.append(get_value_set(num_steps_dict, hp_name)) - hp_names.append(hp_name) - # this check might not be needed, as there is always going to be a new - # active HP when in this except block? - if len(new_active_hp_names) <= 0: - raise RuntimeError( - "Unexpected error: There should have been a newly activated hyperparameter" - f" for the current configuration values: {str(unchecked_grid_pts[0])}. " - "Please contact the developers with the code you ran and the stack trace.", - ) from None - - new_conditonal_grid = get_cartesian_product(value_sets, hp_names) - unchecked_grid_pts += new_conditonal_grid - unchecked_grid_pts.popleft() - - return checked_grid_pts diff --git a/Makefile b/Makefile index dae37146..3508083c 100644 --- a/Makefile +++ b/Makefile @@ -6,17 +6,17 @@ .PHONY: help install-dev install-test install-docs pre-commit clean clean-doc clean-build build docs links publish test clean-test +A_DIFFERENT_CHANGE + help: @echo "Makefile ConfigSpace" @echo "* install-dev to install all dev requirements and install pre-commit" - @echo "* pre-commit to run the pre-commit check" + @echo "* check to run the pre-commit check" @echo "* docs to generate and view the html files" - @echo "* linkcheck to check the documentation links" @echo "* publish to help publish the current branch to pypi" @echo "* test to run the tests" PYTHON ?= python -CYTHON ?= cython PYTEST ?= python -m pytest CTAGS ?= ctags PRECOMMIT ?= pre-commit @@ -40,48 +40,20 @@ install-dev: $(PIP) install -e ".[dev]" pre-commit install -install-test: - $(PIP) install -e ".[test]" - -install-docs: - $(PIP) install -e ".[docs]" - check: $(PRECOMMIT) run --all-files -check-types: - mypy ConfigSpace - fix: black --quiet ConfigSpace test ruff --silent --exit-zero --no-cache --fix ConfigSpace test -build: - python -m build - test: $(PYTEST) test -clean-build: - rm -rf ${BUILD} - -clean-docs: - $(MAKE) -C ${DOCDIR} clean - -clean: clean-build clean-docs - -clean-test: clean-build build test - # Running build before making docs is needed all be it very slow. # Without doing a full build, the doctests seem to use docstrings from the last compiled build docs: clean build - $(MAKE) -C ${DOCDIR} html - @echo - @echo "View docs at:" - @echo ${INDEX_HTML} - -links: - $(MAKE) -C ${DOCDIR} linkcheck + mkdocs serve # Publish to testpypi # Will echo the commands to actually publish to be run to publish to actual PyPi @@ -103,9 +75,3 @@ publish: @echo "Once you have decided it works, publish to actual pypi with" @echo @echo " python -m twine upload dist/*" - -cython-annotate: - C_INCLUDE_PATH=$(NUMPY_INCLUDE) cython -3 --directive boundscheck=False,wraparound=False --annotate ConfigSpace/*.pyx - -cython-html: cython-annotate - python -c "import webbrowser; from pathlib import Path; [webbrowser.open(f'file://{path}') for path in Path('ConfigSpace').absolute().glob('*.html')]" diff --git a/README.md b/README.md index 41c38438..b1ba34d1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ConfigSpace -A simple Python/Cython module implementing a domain specific language to manage +A simple Python module implementing a domain specific language to manage configuration spaces for algorithm configuration and hyperparameter optimization tasks. Distributed under BSD 3-clause, see LICENSE except all files in the directory ConfigSpace.nx, which are copied from the networkx package and licensed diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index bbeebd67..00000000 --- a/docs/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -SPHINXBUILD = sphinx-build -BUILDDIR = build -SPHINXOPTS = -ALLSPHINXOPTS = $(SPHINXOPTS) . - -.PHONY: clean buildapi linkcheck html docs html-noexamples - -clean: - rm -rf $(BUILDDIR)/* - rm -rf ../build/ - -linkcheck: - SPHINX_GALLERY_PLOT=False $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -html: clean linkcheck - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - $(SPHINXBUILD) -M doctest $(ALLSPHINXOPTS) $(BUILDDIR)/html || true - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -docs: html linkcheck diff --git a/docs/api/conditions.rst b/docs/api/conditions.rst deleted file mode 100644 index 0d068ccf..00000000 --- a/docs/api/conditions.rst +++ /dev/null @@ -1,53 +0,0 @@ -.. _Conditions: - -Conditions -========== - -ConfigSpace can realize *equal*, *not equal*, *less than*, *greater than* and -*in conditions*. Conditions can be combined by using the conjunctions *and* and -*or*. To see how to use conditions, please take a look at the -:doc:`user guide <../guide>`. - -EqualsCondition -------------------- - -.. autoclass:: ConfigSpace.conditions.EqualsCondition - -.. _NotEqualsCondition: - -NotEqualsCondition ------------------- - -.. autoclass:: ConfigSpace.conditions.NotEqualsCondition - -.. _LessThanCondition: - -LessThanCondition ------------------ - -.. autoclass:: ConfigSpace.conditions.LessThanCondition - - - -GreaterThanCondition --------------------- - -.. autoclass:: ConfigSpace.conditions.GreaterThanCondition - - -InCondition ------------ - -.. autoclass:: ConfigSpace.conditions.InCondition - - -AndConjunction --------------- - -.. autoclass:: ConfigSpace.conditions.AndConjunction - - -OrConjunction -------------- - -.. autoclass:: ConfigSpace.conditions.OrConjunction \ No newline at end of file diff --git a/docs/api/configuration.rst b/docs/api/configuration.rst deleted file mode 100644 index d291d0e7..00000000 --- a/docs/api/configuration.rst +++ /dev/null @@ -1,5 +0,0 @@ -Configuration -============= - -.. autoclass:: ConfigSpace.configuration_space.Configuration - :members: \ No newline at end of file diff --git a/docs/api/configurationspace.rst b/docs/api/configurationspace.rst deleted file mode 100644 index 6b16b0cf..00000000 --- a/docs/api/configurationspace.rst +++ /dev/null @@ -1,5 +0,0 @@ -ConfigurationSpace -================== - -.. autoclass:: ConfigSpace.configuration_space.ConfigurationSpace - :members: \ No newline at end of file diff --git a/docs/api/forbidden_clauses.rst b/docs/api/forbidden_clauses.rst deleted file mode 100644 index f4be9c80..00000000 --- a/docs/api/forbidden_clauses.rst +++ /dev/null @@ -1,26 +0,0 @@ -.. _Forbidden clauses: - -Forbidden Clauses -================= - -ConfigSpace contains *forbidden equal* and *forbidden in clauses*. -The *ForbiddenEqualsClause* and the *ForbiddenInClause* can forbid values to be -sampled from a configuration space if a certain condition is met. The -*ForbiddenAndConjunction* can be used to combine *ForbiddenEqualsClauses* and -the *ForbiddenInClauses*. - -For a further example, please take a look in the :doc:`user guide <../guide>`. - -ForbiddenEqualsClause ---------------------- -.. autoclass:: ConfigSpace.ForbiddenEqualsClause(hyperparameter, value) - - -ForbiddenInClause ------------------ -.. autoclass:: ConfigSpace.ForbiddenInClause(hyperparameter, values) - - -ForbiddenAndConjunction ------------------------ -.. autoclass:: ConfigSpace.ForbiddenAndConjunction(*args) \ No newline at end of file diff --git a/docs/api/hyperparameters.rst b/docs/api/hyperparameters.rst deleted file mode 100644 index 47120123..00000000 --- a/docs/api/hyperparameters.rst +++ /dev/null @@ -1,116 +0,0 @@ -.. _Hyperparameters: - -Hyperparameters -=============== -ConfigSpace contains -:func:`~ConfigSpace.api.types.float.Float`, -:func:`~ConfigSpace.api.types.integer.Integer` -and :func:`~ConfigSpace.api.types.categorical.Categorical` hyperparamters, each with their own customizability. - -For :func:`~ConfigSpace.api.types.float.Float` and :func:`~ConfigSpace.api.types.integer.Integer`, you will find their -interface much the same, being able to take the same :ref:`distributions ` and parameters. - -A :func:`~ConfigSpace.api.types.categorical.Categorical` can optionally take weights to define -your own custom distribution over the discrete **un-ordered** choices. -One can also pass ``ordered=True`` to make it an :class:`~ConfigSpace.hyperparameters.OrdinalHyperparameter`. - -These are all **convenience** functions that construct the more complex :ref:`hyperparameter classes `, *e.g.* :class:`~ConfigSpace.hyperparameters.UniformIntegerHyperparameter`, -which are the underlying complex types which make up the backbone of what's possible. -You may still use these complex classes without any functional difference. - -.. note:: - - The Simple types, `Integer`, `Float` and `Categorical` are just simple functions that construct the more complex underlying types. - -Example usages are shown below each. - -Simple Types ------------- - -Float -^^^^^ - -.. automodule:: ConfigSpace.api.types.float - -Integer -^^^^^^^ - -.. automodule:: ConfigSpace.api.types.integer - -Categorical -^^^^^^^^^^^ - -.. automodule:: ConfigSpace.api.types.categorical - - -.. _Distributions: - -Distributions -------------- -These can be used as part of the ``distribution`` parameter for the basic -:func:`~ConfigSpace.api.types.integer.Integer` and :func:`~ConfigSpace.api.types.float.Float` functions. - -.. automodule:: ConfigSpace.api.distributions - :exclude-members: Distribution - -.. _Advanced_Hyperparameters: - -Advanced Types --------------- -The full hyperparameters are exposed through the following API points. - - -Integer hyperparameters -^^^^^^^^^^^^^^^^^^^^^^^ - -These can all be constructed with the simple :func:`~ConfigSpace.api.types.integer.Integer` function and -passing the corresponding :ref:`distribution `. - -.. autoclass:: ConfigSpace.hyperparameters.UniformIntegerHyperparameter - -.. autoclass:: ConfigSpace.hyperparameters.NormalIntegerHyperparameter - -.. autoclass:: ConfigSpace.hyperparameters.BetaIntegerHyperparameter - - - -.. _advanced_float: - -Float hyperparameters -^^^^^^^^^^^^^^^^^^^^^ - -These can all be constructed with the simple :func:`~ConfigSpace.api.types.float` function and -passing the corresponding :ref:`distribution `. - -.. autoclass:: ConfigSpace.hyperparameters.UniformFloatHyperparameter - -.. autoclass:: ConfigSpace.hyperparameters.NormalFloatHyperparameter - -.. autoclass:: ConfigSpace.hyperparameters.BetaFloatHyperparameter - - - -.. _advanced_categorical: - -Categorical Hyperparameter -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -This can be constructed with the simple form :func:`~ConfigSpace.api.types.categorical` and setting -``ordered=False`` which is the default. - -.. autoclass:: ConfigSpace.hyperparameters.CategoricalHyperparameter - - -Ordinal Hyperparameter -^^^^^^^^^^^^^^^^^^^^^^ -This can be constructed with the simple form :func:`~ConfigSpace.api.types.categorical` and setting -``ordered=True``. - -.. autoclass:: ConfigSpace.hyperparameters.OrdinalHyperparameter - -.. _Other hyperparameters: - -Constant -^^^^^^^^ - -.. autoclass:: ConfigSpace.hyperparameters.Constant diff --git a/docs/api/index.rst b/docs/api/index.rst deleted file mode 100644 index adf4c7f3..00000000 --- a/docs/api/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -API -+++ - - -.. toctree:: - :maxdepth: 2 - - configurationspace - configuration - hyperparameters - conditions - forbidden_clauses - serialization - utils \ No newline at end of file diff --git a/docs/api/serialization.rst b/docs/api/serialization.rst deleted file mode 100644 index f57d08e8..00000000 --- a/docs/api/serialization.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. _Serialization: - -Serialization -============= - -ConfigSpace offers *json*, *pcs* and *pcs_new* writers/readers. -These classes can serialize and deserialize configuration spaces. -Serializing configuration spaces is useful to share configuration spaces across -experiments, or use them in other tools, for example, to analyze hyperparameter -importance with `CAVE `_. - -.. _json: - -Serialization to JSON ---------------------- - -.. automodule:: ConfigSpace.read_and_write.json - :members: read, write - -.. _pcs_new: - -Serialization with pcs-new (new format) ---------------------------------------- - -.. automodule:: ConfigSpace.read_and_write.pcs_new - :members: read, write - -Serialization with pcs (old format) ------------------------------------ - -.. automodule:: ConfigSpace.read_and_write.pcs - :members: read, write diff --git a/docs/api/utils.rst b/docs/api/utils.rst deleted file mode 100644 index 361565ed..00000000 --- a/docs/api/utils.rst +++ /dev/null @@ -1,10 +0,0 @@ -Utils -===== - -Functions defined in the utils module can be helpful to -develop custom tools that create configurations from a given configuration -space or modify a given configuration space. - -.. automodule:: ConfigSpace.util - :members: - :undoc-members: diff --git a/docs/images/logo.png b/docs/assets/logo.png similarity index 100% rename from docs/images/logo.png rename to docs/assets/logo.png diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index af78f86c..00000000 --- a/docs/conf.py +++ /dev/null @@ -1,39 +0,0 @@ -import datetime - -import automl_sphinx_theme -from ConfigSpace import __authors__, __version__ - -authors = ", ".join(__authors__) - - -options = { - "copyright": f"""Copyright {datetime.date.today().strftime('%Y')}, {authors}""", - "author": authors, - "version": __version__, - "name": "ConfigSpace", - "html_theme_options": { - "github_url": "https://github.com/automl/ConfigSpace", - "twitter_url": "https://twitter.com/automl_org?lang=de", - }, -} - -# Import conf.py from the automl theme -automl_sphinx_theme.set_options(globals(), options) - -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.doctest", - "sphinx.ext.coverage", - "sphinx.ext.mathjax", - "sphinx.ext.viewcode", - "sphinx.ext.autosummary", - "sphinx.ext.napoleon", - "sphinx.ext.githubpages", - "sphinx.ext.doctest", -] - -autodoc_typehints = "description" -autoclass_content = "both" -autodoc_default_options = { - "inherited-members": True, -} diff --git a/docs/guide.md b/docs/guide.md new file mode 100644 index 00000000..6887561c --- /dev/null +++ b/docs/guide.md @@ -0,0 +1,311 @@ +## User Guide +In this user guide, the concepts of using different hyperparameters, +applying conditions and forbidden clauses to a configuration space are explained. + +These concepts will be introduced by defining a more complex configuration space +for a support vector machine. + +### 1st Example: Integer hyperparameters and float hyperparameters +Assume that we want to use a support vector machine (=SVM) for classification +tasks and therefore, we want to optimize its hyperparameters: + +- `C`: regularization constant with `C` being a float value. +- `max_iter`: the maximum number of iterations within the solver with `max_iter` being a positive integer. + +The implementation of the classifier is out of scope and thus not shown. +But for further reading about support vector machines and the meaning of its hyperparameter, +you can continue reading [here](https://en.wikipedia.org/wiki/Support_vector_machine) or +in the [scikit-learn documentation](http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC). + +The first step is always to create a +[`ConfigurationSpace`][ConfigSpace.configuration_space.ConfigurationSpace] with the +hyperparameters `C` and `max_iter`. + +To restrict the search space, we choose `C` to be a +[`Float`][ConfigSpace.api.types.float.Float] between -1 and 1. +Furthermore, we choose `max_iter` to be an [`Integer`][ConfigSpace.api.types.integer.Integer]. + +```python exec="True" source="material-block" result="python" session="example_one" +from ConfigSpace import ConfigurationSpace + +cs = ConfigurationSpace( + space={ + "C": (-1.0, 1.0), # Note the decimal to make it a float + "max_iter": (10, 100), + }, + seed=1234, +) +print(cs) +``` + +Now, the [`ConfigurationSpace`][ConfigSpace.configuration_space.ConfigurationSpace] object *cs* +contains definitions of the hyperparameters `C` and `max_iter` with their +value-ranges. + +For demonstration purpose, we sample a configuration from it. + +```python exec="True" source="material-block" result="python" session="example_one" +config = cs.sample_configuration() +print(config) +``` + +Sampled instances from a [`ConfigurationSpace`][ConfigSpace.configuration_space.ConfigurationSpace] +are called a [`Configuration`][ConfigSpace.configuration.Configuration]. +In a [`Configuration`][ConfigSpace.configuration.Configuration], +a parameter can be accessed or modified similar to a python dictionary. + +```python exec="True" source="material-block" result="python" session="example_one" +for key, value in config.items(): + print(f"{key}: {value}") + +print(config["C"]) +``` + + +### 2nd Example: Categorical hyperparameters and conditions +The scikit-learn SVM supports different kernels, such as an RBF, a sigmoid, +a linear or a polynomial kernel. We want to include them in the configuration space. +Since this new hyperparameter has a finite number of values, we use a +[`Categorical`][`ConfigSpace.api.types.categorical.Categorical`]. + +- `kernel_type` in `#!python ['linear', 'poly', 'rbf', 'sigmoid']`. + +Taking a look at the SVM documentation, we observe that if the kernel type is +chosen to be `'poly'`, another hyperparameter `degree` must be specified. +Also, for the kernel types `'poly'` and `'sigmoid'`, there is an additional hyperparameter `coef0`. +As well as the hyperparameter `gamma` for the kernel types `'rbf'`, `'poly'` and `'sigmoid'`. + +- `degree`: the integer degree of a polynomial kernel function. +- `coef0`: Independent term in kernel function. It is only needed for `'poly'` and `'sigmoid'` kernel. +- `gamma`: Kernel coefficient for `'rbf'`, `'poly'` and `'sigmoid'`. + +To realize the different hyperparameter for the kernels, we use **Conditionals**. +Please refer to their [reference page](./reference/conditions.md) for more. + +Even in simple examples, the configuration space grows easily very fast and +with it the number of possible configurations. +It makes sense to limit the search space for hyperparameter optimizations in +order to quickly find good configurations. For conditional hyperparameters +_(hyperparameters which only take a value if some condition is met)_, ConfigSpace +achieves this by sampling those hyperparameters from the configuration +space only if their condition is met. + +To add conditions on hyperparameters to the configuration space, we first have +to insert the new hyperparameters in the `ConfigSpace` and in a second step, the +conditions on them. + +```python exec="True" source="material-block" result="python" session="example_two" +from ConfigSpace import ConfigurationSpace, Categorical, Float, Integer + +kernel_type = Categorical('kernel_type', ['linear', 'poly', 'rbf', 'sigmoid']) +degree = Integer('degree', bounds=(2, 4), default=2) +coef0 = Float('coef0', bounds=(0, 1), default=0.0) +gamma = Float('gamma', bounds=(1e-5, 1e2), default=1, log=True) + +cs = ConfigurationSpace() +cs.add([kernel_type, degree, coef0, gamma]) +print(cs) +``` + +First, we define the conditions. Conditions work by constraining a child +hyperparameter (the first argument) on its parent hyperparameter (the second argument) +being in a certain relation to a value (the third argument). +`EqualsCondition(degree, kernel_type, 'poly')` expresses that `degree` is +constrained on `kernel_type` being equal to the value `'poly'`. +To express constraints involving multiple parameters or values, we can use conjunctions. +In the following example, `cond_2` describes that `coef0` +is a valid hyperparameter, if the `kernel_type` has either the value `'poly'` or `'sigmoid'`. + +```python exec="True" source="material-block" result="python" session="example_two" +from ConfigSpace import EqualsCondition, InCondition, OrConjunction + +# read as: "degree is active if kernel_type == 'poly'" +cond_1 = EqualsCondition(degree, kernel_type, 'poly') + +# read as: "coef0 is active if (kernel_type == 'poly' or kernel_type == 'sigmoid')" +# You could also define this using an InCondition as shown below +cond_2 = OrConjunction( + EqualsCondition(coef0, kernel_type, 'poly'), + EqualsCondition(coef0, kernel_type, 'sigmoid') +) + +# read as: "gamma is active if kernel_type in ['rbf', 'poly', 'sigmoid']" +cond_3 = InCondition(gamma, kernel_type, ['rbf', 'poly','sigmoid']) +``` + +Finally, we add the conditions to the configuration space + +```python exec="True" source="material-block" result="python" session="example_two" +cs.add([cond_1, cond_2, cond_3]) +print(cs) +``` + +!!! note + + ConfigSpace offers a lot of different condition types. + Please check out the [conditions reference page](./reference/conditions.md) for more. + +!!! warning + + We advise not using the `EqualsCondition` or the `InCondition` on float hyperparameters. + Due to numerical rounding that can occur, it can be the case that these conditions evaluate to + `False` even if they should evaluate to `True`. + + +### 3rd Example: Forbidden clauses +It may occur that some states in the configuration space are not allowed. +ConfigSpace supports this functionality by offering **Forbidden clauses**. + +We demonstrate the usage of Forbidden clauses by defining the configuration space for the +[linear SVM](http://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC). +Again, we use the sklearn implementation. This implementation has three +hyperparameters to tune: + +- `penalty`: Specifies the norm used in the penalization with values `'l1'` or `'l2j'`. +- `loss`: Specifies the loss function with values `'hinge'` or `'squared_hinge'`. +- `dual`: Solves the optimization problem either in the dual or simple form with values `True` or `False`. + +Because some combinations of `penalty`, `loss` and `dual` just don't work +together, we want to make sure that these combinations are not sampled from the +configuration space. +It is possible to represent these as conditionals, however sometimes it is easier to +express them as forbidden clauses. + +First, we add these three new hyperparameters to the configuration space. + +```python exec="True" source="material-block" result="python" session="example_three" +from ConfigSpace import ConfigurationSpace, Categorical, Constant + +cs = ConfigurationSpace() + +penalty = Categorical("penalty", ["l1", "l2"], default="l2") +loss = Categorical("loss", ["hinge", "squared_hinge"], default="squared_hinge") +dual = Constant("dual", "False") +cs.add([penalty, loss, dual]) +print(cs) +``` + +Now, we want to forbid the following hyperparameter combinations: + +- `penalty` is `'l1'` and `loss` is `'hinge'`. +- `dual` is False and `penalty` is `'l2'` and `loss` is `'hinge'` +- `dual` is False and `penalty` is `'l1'` + +```python exec="True" source="material-block" result="python" session="example_three" +from ConfigSpace import ForbiddenEqualsClause, ForbiddenAndConjunction + +penalty_and_loss = ForbiddenAndConjunction( + ForbiddenEqualsClause(penalty, "l1"), + ForbiddenEqualsClause(loss, "hinge") +) +constant_penalty_and_loss = ForbiddenAndConjunction( + ForbiddenEqualsClause(dual, "False"), + ForbiddenEqualsClause(penalty, "l2"), + ForbiddenEqualsClause(loss, "hinge") +) +penalty_and_dual = ForbiddenAndConjunction( + ForbiddenEqualsClause(dual, "False"), + ForbiddenEqualsClause(penalty, "l1") +) +``` + +In the last step, we add them to the configuration space object: + +```python exec="True" source="material-block" result="python" session="example_three" +cs.add([penalty_and_loss, constant_penalty_and_loss, penalty_and_dual]) +print(cs) +``` + + +### 4th Example Serialization +To serialize the `ConfigurationSpace` object, we can choose between different output formats, such as +as plain-type dictionary, directly to `.yaml` or `.json` and if required for backwards compatiblity `pcs`. +Plese see the [serialization reference page](./reference/serialization.md) for more. + +In this example, we want to store the [`ConfigurationSpace`][ConfigSpace.configuration_space.ConfigurationSpace] +object as a `.yaml` file. + +```python exec="True" source="material-block" result="yaml" session="example_four" +from pathlib import Path +from ConfigSpace import ConfigurationSpace + +path = Path("configspace.yaml") +cs = ConfigurationSpace( + space={ + "C": (-1.0, 1.0), # Note the decimal to make it a float + "max_iter": (10, 100), + }, + seed=1234, +) +cs.to_yaml(path) +loaded_cs = ConfigurationSpace.from_yaml(path) + +with path.open() as f: + print(f.read()) +path.unlink() # markdown-exec: hide +``` + +If you require custom encoding or decoding or parameters, please refer to the +[serialization reference page](./reference/serialization.md) for more. + +### 5th Example: Placing priors on the hyperparameters +If you want to conduct black-box optimization in [SMAC](https://arxiv.org/abs/2109.09831), +and you have prior knowledge about the which regions of the search space are more likely to contain the optimum, +you may include this knowledge when designing the configuration space. +More specifically, you place prior distributions over the optimum on the parameters, +either by a (log)-normal or (log)-Beta distribution. +SMAC then considers the given priors through the optimization by using +[PiBO](https://openreview.net/forum?id=MMAeCXIa89). + +Consider the case of optimizing the accuracy of an MLP with three hyperparameters: + +* learning rate in `(1e-5, 1e-1)` +* dropout in `(0, 0.99)` +* activation in `["Tanh", "ReLU"]`. + +From prior experience, you believe the optimal learning rate to be around `1e-3`, +a good dropout to be around `0.25`, +and the optimal activation function to be ReLU about 80% of the time. + +This can be represented accordingly: + +```python exec="True" source="material-block" result="python" session="example_five" +import numpy as np +from ConfigSpace import ConfigurationSpace, Float, Categorical, Beta, Normal + +cs = ConfigurationSpace( + space={ + "lr": Float( + 'lr', + bounds=(1e-5, 1e-1), + default=1e-3, + log=True, + distribution=Normal(1e-3, 1e-1) + ), + "dropout": Float( + 'dropout', + bounds=(0, 0.99), + default=0.25, + distribution=Beta(alpha=2, beta=4) + ), + "activation": Categorical( + 'activation', + items=['tanh', 'relu'], + weights=[0.2, 0.8] + ), + }, + seed=1234, +) +print(cs) +``` + +To check that your prior makes sense for each hyperparameter, +you can easily do so with the [`pdf_values()`][ConfigSpace.hyperparameters.Hyperparameter.pdf_values] method. +There, you will see that the probability of the optimal learning rate peaks at +10^-3, and decays as we go further away from it: + +```python exec="True" source="material-block" result="python" session="example_five" +test_points = np.logspace(-5, -1, 5) +print(test_points) +print(cs['lr'].pdf_values(test_points)) +``` diff --git a/docs/guide.rst b/docs/guide.rst deleted file mode 100644 index f5f37a10..00000000 --- a/docs/guide.rst +++ /dev/null @@ -1,289 +0,0 @@ -User Guide -========== - -In this user guide, the concepts of using different hyperparameters, applying -conditions and forbidden clauses to -a configuration space are explained. - -These concepts will be introduced by defining a more complex configuration space -for a support vector machine. - -1st Example: Integer hyperparameters and float hyperparameters --------------------------------------------------------------- - -Assume that we want to use a support vector machine (=SVM) for classification -tasks and therefore, we want to optimize its hyperparameters: - -- :math:`\mathcal{C}`: regularization constant with :math:`\mathcal{C} \in \mathbb{R}` -- ``max_iter``: the maximum number of iterations within the solver with :math:`max\_iter \in \mathbb{N}` - -The implementation of the classifier is out of scope and thus not shown. -But for further reading about -support vector machines and the meaning of its hyperparameter, you can continue -reading `here `_ or -in the `scikit-learn documentation `_. - -The first step is always to create a -:class:`~ConfigSpace.configuration_space.ConfigurationSpace` with the -hyperparameters :math:`\mathcal{C}` and ``max_iter``. - -To restrict the search space, we choose :math:`\mathcal{C}` to be a -:class:`~ConfigSpace.api.types.float` between -1 and 1. -Furthermore, we choose ``max_iter`` to be an :class:`~ConfigSpace.api.types.integer.Integer` . - ->>> from ConfigSpace import ConfigurationSpace ->>> ->>> cs = ConfigurationSpace( -... seed=1234, -... space={ -... "C": (-1.0, 1.0), # Note the decimal to make it a float -... "max_iter": (10, 100), -... } -... ) - -For demonstration purpose, we sample a configuration from it. - ->>> cs.sample_configuration() -Configuration(values={ - 'C': -0.6169610992422154, - 'max_iter': 66, -}) - - -Now, the :class:`~ConfigSpace.configuration_space.ConfigurationSpace` object *cs* -contains definitions of the hyperparameters :math:`\mathcal{C}` and ``max_iter`` with their -value-ranges. - -.. _1st_Example: - -Sampled instances from a :class:`~ConfigSpace.configuration_space.ConfigurationSpace` -are called :class:`~ConfigSpace.configuration_space.Configuration`. -In a :class:`~ConfigSpace.configuration_space.Configuration` object, the value -of a parameter can be accessed or modified similar to a python dictionary. - ->>> conf = cs.sample_configuration() ->>> conf['max_iter'] = 42 ->>> print(conf['max_iter']) -42 - - -2nd Example: Categorical hyperparameters and conditions -------------------------------------------------------- - -The scikit-learn SVM supports different kernels, such as an RBF, a sigmoid, -a linear or a polynomial kernel. We want to include them in the configuration space. -Since this new hyperparameter has a finite number of values, we use a -:class:`~ConfigSpace.api.types.categorical`. - - -- ``kernel_type``: with values 'linear', 'poly', 'rbf', 'sigmoid'. - -Taking a look at the SVM documentation, we observe that if the kernel type is -chosen to be 'poly', another hyperparameter ``degree`` must be specified. -Also, for the kernel types 'poly' and 'sigmoid', there is an additional hyperparameter ``coef0``. -As well as the hyperparameter ``gamma`` for the kernel types 'rbf', 'poly' and 'sigmoid'. - -- ``degree``: the degree of a polynomial kernel function, being :math:`\in \mathbb{N}` -- ``coef0``: Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'. -- ``gamma``: Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - -To realize the different hyperparameter for the kernels, we use :ref:`Conditions`. - -Even in simple examples, the configuration space grows easily very fast and -with it the number of possible configurations. -It makes sense to limit the search space for hyperparameter optimizations in -order to quickly find good configurations. For conditional hyperparameters -(= hyperparameters which only take a value if some condition is met), ConfigSpace -achieves this by sampling those hyperparameters from the configuration -space only if their condition is met. - -To add conditions on hyperparameters to the configuration space, we first have -to insert the new hyperparameters in the ``ConfigSpace`` and in a second step, the -conditions on them. - ->>> from ConfigSpace import ConfigurationSpace, Categorical, Float, Integer ->>> ->>> kernel_type = Categorical('kernel_type', ['linear', 'poly', 'rbf', 'sigmoid']) ->>> degree = Integer('degree', bounds=(2, 4), default=2) ->>> coef0 = Float('coef0', bounds=(0, 1), default=0.0) ->>> gamma = Float('gamma', bounds=(1e-5, 1e2), default=1, log=True) ->>> ->>> cs = ConfigurationSpace() ->>> cs.add_hyperparameters([kernel_type, degree, coef0, gamma]) -[kernel_type, Type: Categorical, Choices: {linear, poly, rbf, sigmoid}, ...] - -First, we define the conditions. Conditions work by constraining a child -hyperparameter (the first argument) on its parent hyperparameter (the second argument) -being in a certain relation to a value (the third argument). -``EqualsCondition(degree, kernel_type, 'poly')`` expresses that ``degree`` is -constrained on ``kernel_type`` being equal to the value 'poly'. To express -constraints involving multiple parameters or values, we can use conjunctions. -In the following example, ``cond_2`` describes that ``coef0`` -is a valid hyperparameter, if the ``kernel_type`` has either the value -'poly' or 'sigmoid'. - ->>> from ConfigSpace import EqualsCondition, OrConjunction ->>> ->>> cond_1 = EqualsCondition(degree, kernel_type, 'poly') ->>> ->>> cond_2 = OrConjunction( -... EqualsCondition(coef0, kernel_type, 'poly'), -... EqualsCondition(coef0, kernel_type, 'sigmoid') -... ) ->>> ->>> cond_3 = OrConjunction( -... EqualsCondition(gamma, kernel_type, 'rbf'), -... EqualsCondition(gamma, kernel_type, 'poly'), -... EqualsCondition(gamma, kernel_type, 'sigmoid') -... ) - -In this specific example, you may wish to use the :class:`~ConfigSpace.conditions.InCondition` to express -that ``gamma`` is valid if ``kernel_type in ["rbf", "poly", "sigmoid"]`` which we show for completness - ->>> from ConfigSpace import InCondition ->>> ->>> cond_3 = InCondition(gamma, kernel_type, ["rbf", "poly", "sigmoid"]) - -Finally, we add the conditions to the configuration space - ->>> cs.add_conditions([cond_1, cond_2, cond_3]) -[degree | kernel_type == 'poly', (coef0 | kernel_type == 'poly' || coef0 | ...), ...] - -.. note:: - - ConfigSpace offers a lot of different condition types. For example the - :class:`~ConfigSpace.conditions.NotEqualsCondition`, - :class:`~ConfigSpace.conditions.LessThanCondition`, - or :class:`~ConfigSpace.conditions.GreaterThanCondition`. - To read more about conditions, please take a look at the :ref:`Conditions`. - -.. note:: - Don't use either the :class:`~ConfigSpace.conditions.EqualsCondition` or the - :class:`~ConfigSpace.conditions.InCondition` on float hyperparameters. - Due to floating-point inaccuracy, it is very unlikely that the - :class:`~ConfigSpace.conditions.EqualsCondition` is evaluated to True. - - -3rd Example: Forbidden clauses ------------------------------- - -It may occur that some states in the configuration space are not allowed. -ConfigSpace supports this functionality by offering :ref:`Forbidden clauses`. - -We demonstrate the usage of :ref:`Forbidden clauses` by defining the -configuration space for the -`linear SVM `_. -Again, we use the sklearn implementation. This implementation has three -hyperparameters to tune: - -- ``penalty``: Specifies the norm used in the penalization with values 'l1' or 'l2' -- ``loss``: Specifies the loss function with values 'hinge' or 'squared_hinge' -- ``dual``: Solves the optimization problem either in the dual or simple form with values True or False - -Because some combinations of ``penalty``, ``loss`` and ``dual`` just don't work -together, we want to make sure that these combinations are not sampled from the -configuration space. - -First, we add these three new hyperparameters to the configuration space. - ->>> from ConfigSpace import ConfigurationSpace, Categorical, Constant ->>> ->>> penalty = Categorical("penalty", ["l1", "l2"], default="l2") ->>> loss = Categorical("loss", ["hinge", "squared_hinge"], default="squared_hinge") ->>> dual = Constant("dual", "False") ->>> cs.add_hyperparameters([penalty, loss, dual]) -[penalty, Type: Categorical, Choices: {l1, l2}, Default: l2, ...] - -Now, we want to forbid the following hyperparameter combinations: - -- ``penalty`` is 'l1' and ``loss`` is 'hinge' -- ``dual`` is False and ``penalty`` is 'l2' and ``loss`` is 'hinge' -- ``dual`` is False and ``penalty`` is 'l1' - ->>> from ConfigSpace import ForbiddenEqualsClause, ForbiddenAndConjunction ->>> ->>> penalty_and_loss = ForbiddenAndConjunction( -... ForbiddenEqualsClause(penalty, "l1"), -... ForbiddenEqualsClause(loss, "hinge") -... ) ->>> constant_penalty_and_loss = ForbiddenAndConjunction( -... ForbiddenEqualsClause(dual, "False"), -... ForbiddenEqualsClause(penalty, "l2"), -... ForbiddenEqualsClause(loss, "hinge") -... ) ->>> penalty_and_dual = ForbiddenAndConjunction( -... ForbiddenEqualsClause(dual, "False"), -... ForbiddenEqualsClause(penalty, "l1") -... ) - -In the last step, we add them to the configuration space object: - ->>> cs.add_forbidden_clauses([penalty_and_loss, constant_penalty_and_loss, penalty_and_dual]) -[(Forbidden: penalty == 'l1' && Forbidden: loss == 'hinge'), ...] - - -4th Example Serialization -------------------------- - -If you want to use the configuration space in another tool, such as -`CAVE `_, it is useful to store it to file. -To serialize the :class:`~ConfigSpace.configuration_space.ConfigurationSpace`, -we can choose between different output formats, such as -:ref:`json ` or :ref:`pcs `. - -In this example, we want to store the :class:`~ConfigSpace.configuration_space.ConfigurationSpace` -object as json file - ->>> from ConfigSpace.read_and_write import json ->>> with open('configspace.json', 'w') as fh: -... fh.write(json.write(cs)) -2828 - -To read it from file - ->>> with open('configspace.json', 'r') as fh: -... json_string = fh.read() ->>> restored_conf = json.read(json_string) - - -5th Example: Placing priors on the hyperparameters --------------------------------------------------- - -If you want to conduct black-box optimization in SMAC (https://arxiv.org/abs/2109.09831), and you have prior knowledge about the which regions of the search space are more likely to contain the optimum, you may include this knowledge when designing the configuration space. More specifically, you place prior distributions over the optimum on the parameters, either by a (log)-normal or (log)-Beta distribution. SMAC then considers the given priors through the optimization by using PiBO (https://openreview.net/forum?id=MMAeCXIa89). - -Consider the case of optimizing the accuracy of an MLP with three hyperparameters: learning rate [1e-5, 1e-1], dropout [0, 0.99] and activation {Tanh, ReLU}. From prior experience, you believe the optimal learning rate to be around 1e-3, a good dropout to be around 0.25, and the optimal activation function to be ReLU about 80% of the time. This can be represented accordingly: - ->>> import numpy as np ->>> from ConfigSpace import ConfigurationSpace, Float, Categorical, Beta, Normal ->>> ->>> # convert 10 log to natural log for learning rate, mean 1e-3 ->>> # with two standard deviations on either side of the mean to cover the search space ->>> logmean = np.log(1e-3) ->>> logstd = np.log(10.0) ->>> ->>> cs = ConfigurationSpace( -... seed=1234, -... space={ -... "lr": Float('lr', bounds=(1e-5, 1e-1), default=1e-3, log=True, distribution=Normal(logmean, logstd)), -... "dropout": Float('dropout', bounds=(0, 0.99), default=0.25, distribution=Beta(alpha=2, beta=4)), -... "activation": Categorical('activation', ['tanh', 'relu'], weights=[0.2, 0.8]), -... } -... ) ->>> print(cs) -Configuration space object: - Hyperparameters: - activation, Type: Categorical, Choices: {tanh, relu}, Default: tanh, Probabilities: (0.2, 0.8) - dropout, Type: BetaFloat, Alpha: 2.0 Beta: 4.0, Range: [0.0, 0.99], Default: 0.25 - lr, Type: NormalFloat, Mu: -6.907755278982137 Sigma: 2.302585092994046, Range: [1e-05, 0.1], Default: 0.001, on log-scale - - -To check that your prior makes sense for each hyperparameter, you can easily do so with the ``__pdf__`` method. There, you will see that the probability of the optimal learning rate peaks at 10^-3, and decays as we go further away from it: - ->>> test_points = np.logspace(-5, -1, 5) ->>> print(test_points) -[1.e-05 1.e-04 1.e-03 1.e-02 1.e-01] - -The pdf function accepts an (N, ) numpy array as input. - ->>> cs['lr'].pdf(test_points) -array([0.02456573, 0.11009594, 0.18151753, 0.11009594, 0.02456573]) diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..6417354b --- /dev/null +++ b/docs/index.md @@ -0,0 +1,118 @@ +## Welcome to ConfigSpace's documentation! +ConfigSpace is a simple python package to manage configuration spaces for +[algorithm configuration](https://ml.informatik.uni-freiburg.de/papers/09-JAIR-ParamILS.pdf) and +[hyperparameter optimization](https://en.wikipedia.org/wiki/Hyperparameter_optimization) tasks. +It includes various modules to translate between different text formats for configuration space descriptions. + +ConfigSpace is often used in AutoML tools such as +[SMAC3](https://github.com/automl/SMAC3), +[BOHB](https://github.com/automl/HpBandSter), +[auto-sklearn](https://github.com/automl/auto-sklearn). +To read more about our group and projects, visit our homepage [AutoML.org](https://www.automl.org). + +This documentation explains how to use ConfigSpace and demonstrates its features. +In the [quickstart](./quickstart.md), you will see how to set up a +[`ConfiguratonSpace`][ConfigSpace.configuration_space.ConfigurationSpace] +and add hyperparameters of different types to it. +Besides containing hyperparameters, `ConfigurationSpace` can contain constraints such as conditions and forbidden clauses. +Those are introduced in the [user guide](./guide.md) + +!!! tip "New in 1.0!" + + In ConfigSpace 1.0, we have removed the dependancy on `Cython` while even improving + the performance! + + * Should now install anywhere. + * You can now use your editor to jump to definition and see the source code. + * Contribute more easily! + + With this, we have also deprecated many of the previous functions, simplifying the API + where possible or improving it's clarity. We have tried hard to keep everything backwards + compatible, and also recommend the new functionality to use! + + We've also made some strides towards extensibilty of ConfigSpace, making it simpler to + define you own hyperparamter types. Please see the + [hyperparameter reference](./reference/hyperparameters.md) page for more. + + !!! warning + + One notable hard removal is the use of the `"q"` parameter to numerical parameters. + We recommend using an `Ordinal` distribution where possible. Please let us know if this + effects you and we can help migrate where possible. + +### Getting Started +Create a simple [`ConfigurationSpace`][ConfigSpace.configuration_space.ConfigurationSpace] +and then sample a [`Configuration`][ConfigSpace.configuration.Configuration] from it. + +```python exec="True" result="python" source="material-block" +from ConfigSpace import ConfigurationSpace + +cs = ConfigurationSpace({ + "myfloat": (0.1, 1.5), # Uniform Float + "myint": (2, 10), # Uniform Integer + "species": ["mouse", "cat", "dog"], # Categorical +}) +configs = cs.sample_configuration(2) +print(configs) +``` + + +Use [`Float`][ConfigSpace.api.types.float.Float], +[`Integer`][ConfigSpace.api.types.integer.Integer], +and [`Categorical`][ConfigSpace.api.types.categorical.Categorical] to define hyperparameters +and define how sampling is done. + +```python exec="True" result="python" source="material-block" +from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal + +cs = ConfigurationSpace( + name="myspace", + seed=1234, + space={ + "a": Float("a", bounds=(0.1, 1.5), distribution=Normal(1, 0.5)), + "b": Integer("b", bounds=(1, 10_00), log=True, default=100), + "c": Categorical("c", ["mouse", "cat", "dog"], weights=[2, 1, 1]), + }, +) +configs = cs.sample_configuration(2) +print(configs) +``` + +Maximum flexibility with conditionals, see the [user guide](./guide.md) for more information. + +```python exec="True" result="python" source="material-block" +from ConfigSpace import Categorical, ConfigurationSpace, EqualsCondition, Float + +cs = ConfigurationSpace(seed=1234) + +c = Categorical("c1", items=["a", "b"]) +f = Float("f1", bounds=(1.0, 10.0)) + +# A condition where `f` is only active if `c` is equal to `a` when sampled +cond = EqualsCondition(f, c, "a") + +# Add them explicitly to the configuration space +cs.add([c, f]) +cs.add(cond) + +print(cs) +``` + + +### Installation +ConfigSpace requires **Python 3.8** or higher +and can be installed directly from the Python Package Index (PyPI) using `pip`. + +```bash +pip install ConfigSpace +``` + +### Citing ConfigSpace +```bibtex + @article{ + title = {BOAH: A Tool Suite for Multi-Fidelity Bayesian Optimization & Analysis of Hyperparameters}, + author = {M. Lindauer and K. Eggensperger and M. Feurer and A. Biedenkapp and J. Marben and P. Müller and F. Hutter}, + journal = {arXiv:1908.06756 {[cs.LG]}}, + date = {2019}, + } +``` diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index fff21dac..00000000 --- a/docs/index.rst +++ /dev/null @@ -1,137 +0,0 @@ -Welcome to ConfigSpace's documentation! -======================================= - -.. toctree:: - :hidden: - :maxdepth: 2 - - quickstart - guide - api/index - -ConfigSpace is a simple python package to manage configuration spaces for -`algorithm configuration `_ and -`hyperparameter optimization `_ tasks. -It includes various modules to translate between different text formats for -configuration space descriptions. - -ConfigSpace is often used in AutoML tools such as `SMAC3`_, `BOHB`_ or -`auto-sklearn`_. To read more about our group and projects, visit our homepage -`AutoML.org `_. - -This documentation explains how to use ConfigSpace and demonstrates its features. -In the :doc:`quickstart`, you will see how to set up a :class:`~ConfigSpace.configuration_space.ConfigurationSpace` -and add hyperparameters of different types to it. -Besides containing hyperparameters, a :class:`~ConfigSpace.configuration_space.ConfigurationSpace` can contain constraints such as conditions and forbidden clauses. -Those are introduced in the :doc:`user guide `. - -Furthermore, in the :ref:`serialization section `, it will be -explained how to serialize a :class:`~ConfigSpace.configuration_space.ConfigurationSpace` for later usage. - -.. _SMAC3: https://github.com/automl/SMAC3 -.. _BOHB: https://github.com/automl/HpBandSter -.. _auto-sklearn: https://github.com/automl/auto-sklearn - - - -Get Started ------------ - -Create a simple :class:`~ConfigSpace.configuration_space.ConfigurationSpace` and then sample a :class:`~ConfigSpace.configuration_space.Configuration` from it! - ->>> from ConfigSpace import ConfigurationSpace ->>> ->>> cs = ConfigurationSpace({ -... "myfloat": (0.1, 1.5), # Uniform Float -... "myint": (2, 10), # Uniform Integer -... "species": ["mouse", "cat", "dog"], # Categorical -... }) ->>> configs = cs.sample_configuration(2) - - -Use :mod:`~ConfigSpace.api.types.float`, :mod:`~ConfigSpace.api.types.integer` -or :mod:`~ConfigSpace.api.types.categorical` to customize how sampling is done! - ->>> from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal ->>> cs = ConfigurationSpace( -... name="myspace", -... seed=1234, -... space={ -... "a": Float("a", bounds=(0.1, 1.5), distribution=Normal(1, 10), log=True), -... "b": Integer("b", bounds=(2, 10)), -... "c": Categorical("c", ["mouse", "cat", "dog"], weights=[2, 1, 1]), -... }, -... ) ->>> cs.sample_configuration(2) -[Configuration(values={ - 'a': 0.17013149799713567, - 'b': 5, - 'c': 'dog', -}) -, Configuration(values={ - 'a': 0.5476203000512754, - 'b': 9, - 'c': 'mouse', -}) -] - -Maximum flexibility with conditionals, see :ref:`forbidden clauses ` and :ref:`conditionals ` for more info. - ->>> from ConfigSpace import Categorical, ConfigurationSpace, EqualsCondition, Float -... ->>> cs = ConfigurationSpace(seed=1234) -... ->>> c = Categorical("c1", items=["a", "b"]) ->>> f = Float("f1", bounds=(1.0, 10.0)) -... ->>> # A condition where `f` is only active if `c` is equal to `a` when sampled ->>> cond = EqualsCondition(f, c, "a") -... ->>> # Add them explicitly to the configuration space ->>> cs.add_hyperparameters([c, f]) -[c1, Type: Categorical, Choices: {a, b}, Default: a, f1, Type: UniformFloat, Range: [1.0, 10.0], Default: 5.5] - ->>> cs.add_condition(cond) -f1 | c1 == 'a' - - - -Installation -============ - -*ConfigSpace* requires Python 3.7 or higher. - -*ConfigSpace* can be installed with *pip*: - -.. code:: bash - - pip install ConfigSpace - -If installing from source, the *ConfigSpace* package requires *numpy*, *cython* -and *pyparsing*. Additionally, a functioning C compiler is required. - -On Ubuntu, the required compiler tools and Python headers can be installed with: - -.. code:: bash - - sudo apt-get install build-essential python3 python3-dev - -When using Anaconda/Miniconda, the compiler has to be installed with: - -.. code:: bash - - conda install gxx_linux-64 gcc_linux-64 - - -Citing the ConfigSpace -====================== - -.. code:: - - @article{ - title = {BOAH: A Tool Suite for Multi-Fidelity Bayesian Optimization & Analysis of Hyperparameters}, - author = {M. Lindauer and K. Eggensperger and M. Feurer and A. Biedenkapp and J. Marben and P. Müller and F. Hutter}, - journal = {arXiv:1908.06756 {[cs.LG]}}, - date = {2019}, - } - diff --git a/docs/quickstart.md b/docs/quickstart.md new file mode 100644 index 00000000..f864807c --- /dev/null +++ b/docs/quickstart.md @@ -0,0 +1,199 @@ +## Quickstart +A [ConfigurationSpace][ConfigSpace.configuration_space.ConfigurationSpace] +is a data structure to describe the configuration space of an algorithm to tune. +Possible hyperparameter types are numerical, categorical, conditional and ordinal hyperparameters. + +AutoML tools, such as [`SMAC3`](https://github.com/automl/SMAC3) and [`BOHB`](https://github.com/automl/HpBandSter) are using the configuration space +module to sample hyperparameter configurations. +Also, [`auto-sklearn`](https://github.com/automl/auto-sklearn), an automated machine learning toolkit, which frees the +machine learning user from algorithm selection and hyperparameter tuning, +makes heavy use of the ConfigSpace package. + +This simple quickstart tutorial will show you, how to set up your own +[ConfigurationSpace][ConfigSpace.configuration_space.ConfigurationSpace], and will demonstrate +what you can realize with it. This [Basic Usage](#basic-usage) will include the following: + +- Create a [ConfigurationSpace][ConfigSpace.configuration_space.ConfigurationSpace] +- Define a simple [hyperparameter](./reference/hyperparameters.md) with a float value + +The [Advanced Usage](#advanced-usage) will cover: + +- Creating two sets of possible model configs, using [Conditions](./reference/conditions.md). +- Use a different distirbution for one of the hyperparameters. +- Create two subspaces from these and add them to a parent [ConfigurationSpace][ConfigSpace.configuration_space.ConfigurationSpace] +- Turn these configs into actual models! + +These will not show the following and you should refer to the [user guide](./guide.md) for more: + +- Add [Forbidden clauses](./reference/forbiddens.md) +- Add [Conditions](./reference/conditions.md) +- [Serialize](./reference/configuration.md) + + +### Basic Usage + +We take a look at a simple +[ridge regression](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html), +which has only one floating hyperparameter `alpha`. + +The first step is always to create a +[ConfigurationSpace][ConfigSpace.configuration_space.ConfigurationSpace] object. All the +hyperparameters and constraints will be added to this object. + +```python exec="true", source="material-block" result="python" session="quickstart-basic" +from ConfigSpace import ConfigurationSpace, Float + +cs = ConfigurationSpace(space={"alpha": (0.0, 1.0)}, seed=1234) +print(cs) +``` + +The hyperparameter `alpha` is chosen to have floating point values from `0` to `1`. +For demonstration purpose, we sample a configuration from the [ConfigurationSpace][ConfigSpace.configuration_space.ConfigurationSpace] object. + +```python exec="true", source="material-block" result="python" session="quickstart-basic" +config = cs.sample_configuration() +print(config) +``` + +You can use this configuration just like you would a regular old python dictionary! + +```python exec="true", source="material-block" result="python" session="quickstart-basic" +for key, value in config.items(): + print(key, value) +``` + +And that's it! + + +### Advanced Usage +Lets create a more complex example where we have two models, model `A` and model `B`. +Model `B` is some kernel based algorithm and `A` just needs a simple float hyperparamter. + + +We're going to create a config space that will let us correctly build a randomly selected model. + + +```python exec="true", source="material-block" result="python" session="quickstart-advanced" +from typing import Literal +from dataclasses import dataclass + +@dataclass +class ModelA: + alpha: float + """Some value between 0 and 1""" + +@dataclass +class ModelB: + kernel: Literal["rbf", "flooper"] + """Kernel type.""" + + kernel_floops: int | None = None + """Number of floops for the flooper kernel, only used if kernel == "flooper".""" +``` + + +First, lets start with building the two individual subspaces where for `A`, we want to sample alpha from a normal distribution and for `B` we have the conditioned parameter and we slightly weight one kernel over another. + +```python exec="true", source="material-block" result="python" session="quickstart-advanced" +from typing import Literal +from ConfigSpace import ConfigurationSpace, Categorical, Integer, Float, Normal, EqualsCondition + +@dataclass +class ModelA: + alpha: float + """Some value between 0 and 1""" + + @staticmethod + def space() -> ConfigurationSpace: + return ConfigurationSpace({ + "alpha": Float("alpha", bounds=(0, 1), distribution=Normal(mu=0.5, sigma=0.2)) + }) + +@dataclass +class ModelB: + kernel: Literal["rbf", "flooper"] + """Kernel type.""" + + kernel_floops: int | None = None + """Number of floops for the flooper kernel, only used if kernel == "flooper".""" + + @staticmethod + def space() -> ConfigurationSpace: + cs = ConfigurationSpace( + { + "kernel": Categorical("kernel", ["rbf", "flooper"], default="rbf", weights=[.75, .25]), + "kernel_floops": Integer("kernel_floops", bounds=(1, 10)), + } + ) + + # We have to make sure "kernel_floops" is only active when the kernel is "floops" + cs.add(EqualsCondition(cs["kernel_floops"], cs["kernel"], "flooper")) + + return cs +``` + + +Finally, we need add these two a parent space where we condition each subspace to only be active depending on a **parent**. +We'll have the default configuration be `A` but we put more emphasis when sampling on `B` + +```python exec="true", source="material-block" result="python" session="quickstart-advanced" +from ConfigSpace import ConfigurationSpace, Categorical + +cs = ConfigurationSpace( + seed=123456, + space={ + "model": Categorical("model", ["A", "B"], default="A", weights=[1, 2]), + } +) + +# We set the prefix and delimiter to be empty string "" so that we don't have to do +# any extra parsing once sampling +cs.add_configuration_space( + prefix="", + delimiter="", + configuration_space=ModelA.space(), + parent_hyperparameter={"parent": cs["model"], "value": "A"}, +) + +cs.add_configuration_space( + prefix="", + delimiter="", + configuration_space=ModelB.space(), + parent_hyperparameter={"parent": cs["model"], "value": "B"} +) +print(cs) +``` + +And that's it! + +However for completness, lets examine how this works by first sampling from our config space. + +```python exec="true", source="material-block" result="python" session="quickstart-advanced" +configs = cs.sample_configuration(4) +print(configs) +``` + +We can see the three different kinds of models we have, our basic `A` model as well as our `B` model +with the two kernels. + +Next, we do some processing of these configs to generate valid params to pass to these models + + +```python exec="true", source="material-block" result="python" session="quickstart-advanced" +models = [] + +for config in configs: + config_as_dict = dict(config) + model_type = config_as_dict.pop("model") + + model = ModelA(**config_as_dict) if model_type == "A" else ModelB(**config_as_dict) + + models.append(model) + +print(models) +``` + + +To continue reading, visit the [user guide](./guide.md) section. There are +more information about hyperparameters, as well as an introduction to the +powerful concepts of [Conditions](./reference/conditions.md) and [Forbidden clauses](./reference/forbiddens.md). diff --git a/docs/quickstart.rst b/docs/quickstart.rst deleted file mode 100644 index bad7efa8..00000000 --- a/docs/quickstart.rst +++ /dev/null @@ -1,228 +0,0 @@ -Quickstart -========== - -A :class:`~ConfigSpace.configuration_space.ConfigurationSpace` -is a data structure to describe the configuration space of an algorithm to tune. -Possible hyperparameter types are numerical, categorical, conditional and ordinal hyperparameters. - -AutoML tools, such as `SMAC3`_ and `BOHB`_ are using the configuration space -module to sample hyperparameter configurations. -Also, `auto-sklearn`_, an automated machine learning toolkit, which frees the -machine learning user from algorithm selection and hyperparameter tuning, -makes heavy use of the ConfigSpace package. - -This simple quickstart tutorial will show you, how to set up your own -:class:`~ConfigSpace.configuration_space.ConfigurationSpace`, and will demonstrate -what you can realize with it. This :ref:`Basic Usage` will include the following: - -- Create a :class:`~ConfigSpace.configuration_space.ConfigurationSpace` -- Define a simple :ref:`hyperparameter ` and its range -- Change its :ref:`distributions `. - -The :ref:`Advanced Usage` will cover: - -- Creating two sets of possible model configs, using :ref:`Conditions` -- Create two subspaces from these and add them to a parent :class:`~ConfigSpace.configuration_space.ConfigurationSpace` -- Turn these configs into actual models! - -These will not show the following and you should refer to the :doc:`user guide ` for more: - -- Add :ref:`Forbidden clauses` -- Add :ref:`Conditions` to the :class:`~ConfigSpace.configuration_space.ConfigurationSpace` -- :ref:`Serialize ` the :class:`~ConfigSpace.configuration_space.ConfigurationSpace` - - -.. _Basic Usage: - -Basic Usage ------------ - -We take a look at a simple -`ridge regression `_, -which has only one floating hyperparameter :math:`\alpha`. - -The first step is always to create a -:class:`~ConfigSpace.configuration_space.ConfigurationSpace` object. All the -hyperparameters and constraints will be added to this object. - ->>> from ConfigSpace import ConfigurationSpace, Float ->>> ->>> cs = ConfigurationSpace( -... seed=1234, -... space={ "alpha": (0.0, 1.0) } -... ) - -The hyperparameter :math:`\alpha` is chosen to have floating point values from 0 to 1. -For demonstration purpose, we sample a configuration from the :class:`~ConfigSpace.configuration_space.ConfigurationSpace` object. - ->>> config = cs.sample_configuration() ->>> print(config) -Configuration(values={ - 'alpha': 0.1915194503788923, -}) - - -You can use this configuration just like you would a regular old python dictionary! - ->>> for key, value in config.items(): -... print(key, value) -alpha 0.1915194503788923 - -And that's it! - - -.. _Advanced Usage: - -Advanced Usage --------------- -Lets create a more complex example where we have two models, model ``A`` and model ``B``. -Model ``B`` is some kernel based algorithm and ``A`` just needs a simple float hyperparamter. - - -We're going to create a config space that will let us correctly build a randomly selected model. - -.. code:: python - - class ModelA: - - def __init__(self, alpha: float): - """ - Parameters - ---------- - alpha: float - Some value between 0 and 1 - """ - ... - - class ModelB: - - def __init__(self, kernel: str, kernel_floops: int | None = None): - """ - Parameters - ---------- - kernel: "rbf" or "flooper" - If the kernel is set to "flooper", kernel_floops must be set. - - kernel_floops: int | None = None - Floop factor of the kernel - """ - ... - - -First, lets start with building the two individual subspaces where for ``A``, we want to sample alpha from a normal distribution and for ``B`` we have the conditioned parameter and we slightly weight one kernel over another. - -.. code:: python - - from ConfigSpace import ConfigSpace, Categorical, Integer, Float, Normal - - class ModelA: - - def __init__(self, alpha: float): - ... - - @staticmethod - def space(self) -> ConfigSpace: - return ConfigurationSpace({ - "alpha": Float("alpha", bounds=(0, 1), distribution=Normal(mu=0.5, sigma=0.2) - }) - - class ModelB: - - def __init__(self, kernel: str, kernel_floops: int | None = None): - ... - - @staticmethod - def space(self) -> ConfigSpace: - cs = ConfigurationSpace( - { - "kernel": Categorical("kernel", ["rbf", "flooper"], default="rbf", weights=[.75, .25]), - "kernel_floops": Integer("kernel_floops", bounds=(1, 10)), - } - ) - - # We have to make sure "kernel_floops" is only active when the kernel is "floops" - cs.add_condition(EqualsCondition(cs_B["kernel_floops"], cs_B["kernel"], "flooper")) - - return cs - - -Finally, we need add these two a parent space where we condition each subspace to only be active depending on a **parent**. -We'll have the default configuration be ``A`` but we put more emphasis when sampling on ``B`` - -.. code:: python - - cs = ConfigurationSpace( - seed=1234, - space={ - "model": Categorical("model", ["A", "B"], default="A", weights=[1, 2]), - } - ) - - # We set the prefix and delimiter to be empty string "" so that we don't have to do - # any extra parsing once sampling - cs.add_configuration_space( - prefix="", - delimiter="", - configuration_space=ModelA.space(), - parent_hyperparameter={"parent": cs["model"], "value": "A"}, - ) - - cs.add_configuration_space( - prefix="", - delimiter="", - configuration_space=modelB.space(), - parent_hyperparameter={"parent": cs["model"], "value": "B"} - ) - -And that's it! - -However for completness, lets examine how this works by first sampling from our config space. - -.. code:: python - - configs = cs.sample_configuration(4) - print(configs) - - # [Configuration(values={ - # 'model': 'A', - # 'alpha': 0.7799758081188035, - # }) - # , Configuration(values={ - # 'model': 'B', - # 'kernel': 'flooper', - # 'kernel_floops': 8, - # }) - # , Configuration(values={ - # 'model': 'B', - # 'kernel': 'rbf', - # }) - # , Configuration(values={ - # 'model': 'B', - # 'kernel': 'rbf', - # }) - # ] - -We can see the three different kinds of models we have, our basic ``A`` model as well as our ``B`` model -with the two kernels. - -Next, we do some processing of these configs to generate valid params to pass to these models - -.. code:: python - - models = [] - - for config in configs: - model_type = config.pop("model") - - model = ModelA(**config) if model_type == "A" else ModelB(**config) - - models.append(model) - - -To continue reading, visit the :doc:`user guide ` section. There are -more information about hyperparameters, as well as an introduction to the -powerful concepts of :ref:`Conditions` and :ref:`Forbidden clauses`. - -.. _SMAC3: https://github.com/automl/SMAC3 -.. _BOHB: https://github.com/automl/HpBandSter -.. _auto-sklearn: https://github.com/automl/auto-sklearn diff --git a/docs/reference/conditions.md b/docs/reference/conditions.md new file mode 100644 index 00000000..a43c0a5c --- /dev/null +++ b/docs/reference/conditions.md @@ -0,0 +1,25 @@ +## Conditions +ConfigSpace can realize *equal*, *not equal*, *less than*, *greater than* and +*in conditions*. + +Conditions can be combined by using the conjunctions *and* and *or*. +To see how to use conditions, please take a look at the [user guide](../guide.md). + +For now, please refer to the individual API docs for these classes: + +* [EqualsCondition][ConfigSpace.conditions.EqualsCondition] +* [NotEqualsCondition][ConfigSpace.conditions.NotEqualsCondition] +* [LessThanCondition][ConfigSpace.conditions.LessThanCondition] +* [GreaterThanCondition][ConfigSpace.conditions.GreaterThanCondition] +* [InCondition][ConfigSpace.conditions.InCondition] + +To combine conditions, you can use the following classes: + +* [AndConjunction][ConfigSpace.conditions.AndConjunction] +* [OrConjunction][ConfigSpace.conditions.OrConjunction] + +!!! warning + + We advise not using the `EqualsCondition` or the `InCondition` on float hyperparameters. + Due to numerical rounding that can occur, it can be the case that these conditions evaluate to + `False` even if they should evaluate to `True`. diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md new file mode 100644 index 00000000..a33e300c --- /dev/null +++ b/docs/reference/configuration.md @@ -0,0 +1,34 @@ +## Configuration +A [`Configuration`][ConfigSpace.configuration.Configuration] is an dict-like object, +going from the name of selected hyperparameters to the values. + +```python exec="True" result="python" +from ConfigSpace import ConfigurationSpace + +cs = ConfigurationSpace( + { + "a": (0, 10), + "b": ["cat", "dog"], + } +) +configuration = cs.sample_configuration() + +for name, value in configuration.items(): + print(f"{name}: {value}") + +print(configuration["a"]) +``` + +Underneath the hood, there is some **vectorized** representation of the configuration, +which in this case may look like `np.array([0.32, 1])` which stands for `{"a": 3.2, "b": "dog"}`. +This vectorized representation can be useful for optimizer numerical optimization algorithms. +You can access it with [`configuration.get_array()`][ConfigSpace.configuration.Configuration]. + +!!! tip + + All `Configuration` have a reference to the underlying + [`ConfigurationSpace`][ConfigSpace.configuration_space.ConfigurationSpace] + which can be access with [`Configuration.config_space`][ConfigSpace.configuration.Configuration.config_space]. + + +For more, please check out the API documentation for [`Configuration`][ConfigSpace.configuration.Configuration]. diff --git a/docs/reference/configuration_space.md b/docs/reference/configuration_space.md new file mode 100644 index 00000000..4e61d784 --- /dev/null +++ b/docs/reference/configuration_space.md @@ -0,0 +1,3 @@ +## Configuration Space +Please refer to the [user guide](../guide.md) for a walkthrough or refer to the +API documentation for [`ConfigurationSpace`][ConfigSpace.configuration_space.ConfigurationSpace]. diff --git a/docs/reference/forbiddens.md b/docs/reference/forbiddens.md new file mode 100644 index 00000000..4f5b9227 --- /dev/null +++ b/docs/reference/forbiddens.md @@ -0,0 +1,23 @@ +## Forbidden Clauses + +ConfigSpace contains *forbidden equal* and *forbidden in clauses*. +The *ForbiddenEqualsClause* and the *ForbiddenInClause* can forbid values to be +sampled from a configuration space if a certain condition is met. The +*ForbiddenAndConjunction* can be used to combine *ForbiddenEqualsClauses* and +the *ForbiddenInClauses*. + +For a further example, please take a look in the [user guide](../guide.md) +or the API docs below: + +### Static clauses +* [ForbiddenEqualsClause][ConfigSpace.forbidden.ForbiddenEqualsClause] +* [ForbiddenInClause][ConfigSpace.forbidden.ForbiddenInClause] + +### Conjunctions +* [ForbiddenAndConjunction][ConfigSpace.forbidden.ForbiddenAndConjunction] + +### Relational Clauses +* [ForbiddenLessThenRelation][ConfigSpace.forbidden.ForbiddenLessThanRelation] +* [ForbiddenGreaterThanRelation][ConfigSpace.forbidden.ForbiddenGreaterThanRelation] +* [ForbiddenEqualsRelation][ConfigSpace.forbidden.ForbiddenEqualsRelation] + diff --git a/docs/reference/hyperparameters.md b/docs/reference/hyperparameters.md new file mode 100644 index 00000000..222c24f8 --- /dev/null +++ b/docs/reference/hyperparameters.md @@ -0,0 +1,546 @@ +## Hyperparameters +ConfigSpace contains three ways to define hyperparameters, each offering more customizabilty than the last. +We first demonstrate the three different ways to define hyperparameters, **inferred**, **simple**, and **direct**. + +Later, we will show how to directly use the hyperparameters if required, however this is mostly for library developers +using ConfigSpace as a dependency. + +--- + +* Directly when constructing the [`ConfigurationSpace`][ConfigSpace.configuration_space.ConfigurationSpace] object, +we call these **inferred** hyperparameters. **Use these if you have a simple search space or are doing rapid prototyping.** +```python exec="True" result="python" source="tabbed-left" +from ConfigSpace import ConfigurationSpace + +cs = ConfigurationSpace( + { + "a": (0, 10), # Integer from 0 to 10 + "b": ["cat", "dog"], # Categorical with choices "cat" and "dog" + "c": (0.0, 1.0), # Float from 0.0 to 1.0 + } +) +print(cs) +``` +* Using functions to create them for you. We call these **simple** hyperparameters and they should +satisfy most use cases. **Use these if you just want to create a searchspace required by another library.** +```python exec="True" result="python" source="tabbed-left" +from ConfigSpace import ConfigurationSpace, Integer, Categorical, Float, Normal + +cs = ConfigurationSpace( + { + "a": Integer("a", (0, 10), log=False), # Integer from 0 to 10 + "b": Categorical("b", ["cat", "dog"], ordered=True), # Ordered categorical with choices "cat" and "dog" + "c": Float("c", (1e-5, 1e2), log=True), # Float from 0.0 to 1.0, log scaled + "d": Float("d", (10, 20), distribution=Normal(15, 2)), # Float from 10 to 20, normal distribution + } +) +print(cs) +``` +* Using the types directly. We call these **direct** hyperparameters. These are the real types used +throughout ConfigSpace and offer the most customizability. +**Use these if you are building a library the utilizes ConfigSpace.** +```python exec="True" result="python" source="tabbed-left" +from ConfigSpace import ( + ConfigurationSpace, + UniformIntegerHyperparameter, + CategoricalHyperparameter, + UniformFloatHyperparameter, + NormalFloatHyperparameter, + OrdinalHyperparameter +) + +cs = ConfigurationSpace( + { + "a": UniformIntegerHyperparameter("a", lower=0, upper=10, log=False), # Integer from 0 to 10 + "b": CategoricalHyperparameter("b", choices=["cat", "dog"], default_value="dog"), # Ordered categorical with choices "cat" and "dog" + "c": UniformFloatHyperparameter("c", lower=1e-5, upper=1e2, log=True), # Float from 0.0 to 1.0, log scaled + "d": NormalFloatHyperparameter("d", lower=10, upper=20, mu=15, sigma=2), # Float from 10 to 20, normal distribution + "e": OrdinalHyperparameter("e", sequence=["s", "m", "l"], default_value="s"), # Ordered categorical + } +) +print(cs) +``` + +## Inferred Hyperparameters +When creating hyperparameters directly in the [`ConfigurationSpace`][ConfigSpace.configuration_space.ConfigurationSpace] object, +you can create three different kinds of hyperparameters. This can be useful for simple testing or quick prototyping. + +```python exec="True" result="python" source="material-block" +from ConfigSpace import ConfigurationSpace + +cs = ConfigurationSpace( + { + "a": (0, 10), # Integer from 0 to 10 + "b": ["cat", "dog"], # Categorical with choices "cat" and "dog" + "c": (0.0, 1.0) # Float from 0.0 to 1.0 + } +) +print(cs) +``` + +The rules are as follows: + +* If the value is a tuple, with `int`s, then it is considered an integer hyperparameter with a **uniform** distribution. +* If the value is a tuple, with `float`s, then it is considered a float hyperparameter with a **uniform** distribution. +* If the value is a list, then each element is considered a choice for a categorical hyperparameter, with no inherit +order. + +!!! warning "Mixed types in a tuple" + + If you use an `int` and a `float` in the same tuple, it will infer the type using the **first** element. + For example, `(0, 1.0)` will be inferred as an integer hyperparameter, while `(1.0, 10)` will + be inferred as a float hyperparameter. + +## Simple Hyperparameters +Most of the time, you just require the ability to create hyperparameters and pass them to some other library. +To make this is as possible, we parametrize building the various **direct** hyperparameters that exist. + +### Integer +The [`Integer()`][ConfigSpace.api.types.integer.Integer] **function** samples an `int` uniformly +from the range `(lower, upper)`, with options to define them as being on a `log=` scale or +that you prefer the sampling to be done under a different `distribution=`. +```python exec="True" result="python" source="material-block" +from ConfigSpace import Integer, ConfigurationSpace, Uniform, Normal + +cs = ConfigurationSpace() + +cs.add( + Integer("a", (0, 10), log=False), + Integer("b", (0, 10), log=False, distribution=Uniform(), default=5), + Integer("c", (1, 1000), log=True, distribution=Normal(mu=200, sigma=200)), +) +print(cs) +print(cs["a"].sample_value(size=5)) +``` + +Please check out the [distributions API][ConfigSpace.api.distributions.Distribution] for more information on the available +distributions. + +!!! warning "Not a type" + Please be aware that `Integer` is a convenience **function** that returns + one of the **direct** hyperparameter classes. Please see the [direct hyperparameters](#direct-hyperparameters) if you need to + access the underlying classes. + +### Float +The [`Float()`][ConfigSpace.api.types.float.Float] **function** samples a `float` uniformly from the range `(lower, upper)`, +with options to define them as being on a `log=` scale or +that you prefer the sampling to be done under a different `distribution=`. +```python exec="True" result="python" source="material-block" +from ConfigSpace import Float, ConfigurationSpace, Uniform, Normal + +cs = ConfigurationSpace() + +cs.add( + Float("a", (0, 10), log=False), + Float("b", (0, 10), log=False, distribution=Uniform(), default=5), + Float("c", (1, 1000), log=True, distribution=Normal(mu=200, sigma=200)), +) +print(cs) +print(cs["a"].sample_value(size=5)) +``` + +Please check out the [distributions API][ConfigSpace.api.distributions.Distribution] for more information on the available +distributions. + +!!! warning "Not a type" + Please be aware that `Float` is a convenience **function** that returns + one of the **direct** hyperparameter classes. Please see the [direct hyperparameters](#direct-hyperparameters) if you need to + access the underlying classes. + +### Categorical +The [`Categorical()`][ConfigSpace.api.types.categorical.Categorical] **function** samples a value from the `choices=` provided. +optionally giving them `weights=`, influencing the distribution of the sampling. You may also define them +as `ordered=` if there is an inherent order to the choices. +```python exec="True" result="python" source="material-block" +from ConfigSpace import Categorical, ConfigurationSpace + +cs = ConfigurationSpace() + +cs.add( + Categorical("a", ["cat", "dog", "mouse"], default="dog"), + Categorical("b", ["small", "medium", "large"], ordered=True, default="medium"), + Categorical("c", [True, False], weights=[0.2, 0.8]), +) +print(cs) +print(cs["c"].sample_value(size=5)) +``` + +!!! warning "Not a type" + Please be aware that `Categorical` is a convenience **function** that returns + one of the **direct** hyperparameter classes. Please see the [direct hyperparameters](#direct-hyperparameters) if you need to + access the underlying classes. + +## Direct Hyperparameters +All of the methods for constructing hyperparameters above will result in one of the following types. + +**Integer Hyperparameter** + +* [`UniformIntegerHyperparameter`][ConfigSpace.hyperparameters.UniformIntegerHyperparameter] +* [`NormalIntegerHyperparameter`][ConfigSpace.hyperparameters.NormalIntegerHyperparameter] +* [`BetaIntegerHyperparameter`][ConfigSpace.hyperparameters.BetaIntegerHyperparameter] + +**Float Hyperparameter** + +* [`UniformFloatHyperparameter`][ConfigSpace.hyperparameters.UniformFloatHyperparameter] +* [`NormalFloatHyperparameter`][ConfigSpace.hyperparameters.NormalFloatHyperparameter] +* [`BetaFloatHyperparameter`][ConfigSpace.hyperparameters.BetaFloatHyperparameter] + +**Categorical Hyperparameter** + +* [`CategoricalHyperparameter`][ConfigSpace.hyperparameters.CategoricalHyperparameter] +* [`OrdinalHyperparameter`][ConfigSpace.hyperparameters.OrdinalHyperparameter] + +You can utilize these types in your code as required for `isinstance` checks or allow your own code to create +them as required. If developing a library, please see below to understand a bit more about the [structure of a Hyperparameter](#structure-of-a-hyperparameter). + +## Structure of a Hyperparameter +All hyperparameters inherit from the [`Hyperparameter`][ConfigSpace.hyperparameters.Hyperparameter] base class, with +two important components to consider: + +1. **vectorized space**: This defines some underlying numeric range along with a procedure to sample from it. +2. **value space**: These are the values that are given back to the user, *e.g.* `["cat", "dog"]`. + +What makes a hyperparameter the hyperparameter it is then: + +1. How we sample from the vectorized space, defined by a [`Distribution`][ConfigSpace.hyperparameters.distributions.Distribution]. +2. How we map to and from the vectorized space to the value space, defined by a [`Transformer`][ConfigSpace.hyperparameters.hp_components.Transformer]. + +??? tip "Why a vectorized space?" + + Most optimizers requires some kind of bounds and a pure numeric space from which to optimize over, i.e. + it would be hard to optimize over a hyperparameter space of `["cat", "dog"]` directly. + + This also lets use share implementation details and optimization across various kinds of hyperparameters + if they share the same underlying vectorized space. + + +!!! example "CategoricalHyperparameter" + + Inside of the `__init__` method of a `CategoricalHyperparameter`, you will find something along the lines + of the following: + + ```python + class CategoricalHyperparameter(Hyperparameter): + def __init__(...): + + # ... + super().__init__( + vector_dist=UniformIntegerDistribution(size=len(choices)), + transformer=TransformerSeq(seq=choices), + ... + ) + ``` + + What this is showing is that we will use + [`UniformIntegerDistribution`][ConfigSpace.hyperparameters.distributions.UniformIntegerDistribution], which + samples integers uniformly from `0` to `len(choices) - 1`, and then we use a + [`TransformerSeq`][ConfigSpace.hyperparameters.hp_components.TransformerSeq] to map these integers to the + corresponding choices provided by the users. + + Internally in `ConfigSpace`, we will primarily work with the vectorized space for efficiency purposes, + but when providing values back to the user, either from the + [`Configuration`][ConfigSpace.configuration.Configuration] or other means, we will use the `transformer=` to + map the vectorized space back to the value space. + +Using just these two components alone, we can provide the following functionality from the [`Hyperparameter`][ConfigSpace.hyperparameters.Hyperparameter] base class: + +* [`sample_vector()`][ConfigSpace.hyperparameters.Hyperparameter.sample_vector]: Samples a vectorized value +* [`sample_value()`][ConfigSpace.hyperparameters.Hyperparameter.sample_value]: +Samples a vectorized value and transforms it back to the value space. +* [`to_value()`][ConfigSpace.hyperparameters.Hyperparameter.to_value]: Transforms a vectorized value to the value space. +* [`to_vector()`][ConfigSpace.hyperparameters.Hyperparameter.to_vector]: Transforms a value space value to the vectorized space. +* [`pdf_vector()`][ConfigSpace.hyperparameters.Hyperparameter.pdf_vector]: The probability density function of a vectorized value. +* [`pdf_values()`][ConfigSpace.hyperparameters.Hyperparameter.pdf_values]: The probability density function of a value, + by transforming it to the vectorized space and then calculating the pdf. +* [`legal_value()`][ConfigSpace.hyperparameters.Hyperparameter.legal_value]: Check if a value is legal. +* [`legal_vector()`][ConfigSpace.hyperparameters.Hyperparameter.legal_vector]: Check if a vectorized value is legal. +* [`.lower_vectorized`][ConfigSpace.hyperparameters.Hyperparameter.lower_vectorized]: The lower bound in vectorized space. +* [`.upper_vectorized`][ConfigSpace.hyperparameters.Hyperparameter.upper_vectorized]: The upper bound in vectorized space. + + +Please note that most of these methods support individual values or numpy arrays of values, either as input or output. +Refer to the [API documentation][ConfigSpace.hyperparameters.Hyperparameter] for more information on the available methods. + +### Neighborhoods +One utility `ConfigSpace` provides to library developers is the ability to define a neighbourhood around a value. +This is often important for optimizers who require a neighbourhood to explore around a particular configuration or value. + +A class inheriting from [`Hyperparameter`][ConfigSpace.hyperparameters.Hyperparameter] must also provide +a [`Neighborhood`][ConfigSpace.hyperparameters.hp_components.Neighborhood], which is something that can be called +with a vectorized value and provide values around that point. + +The expected signature is rather straight forward, given a `vector` value and a `n` number of samples to return, +it should return a numpy array of **up to** `n` **unique** samples. + +```python +def __call__( + self, + vector: np.float64, + n: int, + *, + std: float | None = None, + seed: RandomState | None = None, +) -> npt.NDArray[np.float64]: ... +``` + +They must also provide a `_neighbourhood_size`, either `np.inf` if +unbounded or a method that returns the maximum possible neighbors that are possible around a given value. + +By subclasses providing this through the `__init__` method, we can then provide the following functionality: + +* [`get_num_neighbors()`][ConfigSpace.hyperparameters.Hyperparameter.get_num_neighbors]: Get the number of neighbours around a value. +* [`neighbors_vectorized()`][ConfigSpace.hyperparameters.Hyperparameter.neighbors_vectorized]: Get neighbors around a +point in vectorized space. +* [`neighbors_values()`][ConfigSpace.hyperparameters.Hyperparameter.neighbors_values]: Get neighbors around a +point in value space. + +Please refer to the source code definition of existing hyperparameters for more information on how to implement this. +Most of this is defined in the `__init__` method of the hyperparameter. + +### Example: Implementing the BetaIntegerHyperparameter +For implementing your own hyperparameter type, it's useful to look at a case study of implementing an existing +hyperparameter and to see what functionality can be re-used in the library. +Please refer to this article on Wikipedia for more information on the [Beta distribution](https://en.wikipedia.org/wiki/Beta_distribution) +for more on the distribution. + +!!! tip + + Be aware the `ConfigSpace` is heavily optimized towards performance using profiling, and where possible, it + would be good to use pre-existing components to build your hyperparameter. You'd be surprised how much milliseconds + add up when sampling thousands of configurations both globally and using neighborhoods. + +--- + +#### Defining the BetaintegerHyperparameter +First, we need to define the `__init__` method of the `BetaIntegerHyperparameter`, where we're going to +need the following for a [`BetaIntegerHyperparameter`][ConfigSpace.hyperparameters.IntegerHyperparameter]: + +* `name=`: The name of the hyperparameter, required for all kinds of hyperparameters +* `lower=`, `upper=`: The bounds the user would like in value space, i.e. `(1, 5)` +* `default_value=`: The default value of the hyperparameter. +* `alpha=`, `beta=`: The parameters of the beta distribution itself. + +#### Vectorized Space +For our purposes, we will mostly rely on scipys `beta` distribution to sample from a **vectorized space**. +Here is how you would sample from it in `scipy:` + +```python exec="True" source="material-block" result="python" +from scipy.stats import beta as spbeta + +alpha, beta = 3, 2 +beta_rv = spbeta(alpha, beta) +samples = beta_rv.rvs(size=5) +print(samples) +``` + +The problem however is that scipy only offers a contiuous version of this distribution, however we +need to sample integers. To solve this, we will use the +[`DiscretizedContinuousScipyDistribution`][ConfigSpace.hyperparameters.distributions.DiscretizedContinuousScipyDistribution] + +```python exec="True" source="material-block" result="python" +import numpy as np +from scipy.stats import beta as spbeta +from ConfigSpace.hyperparameters.distributions import DiscretizedContinuousScipyDistribution + +# As before +alpha, beta = 3, 2 +beta_rv = spbeta(alpha, beta) + + +# Declare our value space bounds and how many discrete steps there +# are between them. +value_bounds = (1, 5) +discrete_steps = value_bounds[1] - value_bounds[0] + 1 + +# Creates a distribution which can discretize the continuous range +# into `size` number of steps, such that we can map the discretized +# vector values into integers in the range that was requested. + +# Where possible, it is usually preferable to have vectorized bounds from (0, 1) +# We also require all vectorized values to be np.float64, even if they represent integers +vector_distribution = DiscretizedContinuousScipyDistribution( + rv=beta_rv, + steps=discrete_steps, + lower_vectorized=np.float64(0), + upper_vectorized=np.float64(1), +) +print(vector_distribution.sample_vector(n=5)) +``` + +!!! tip + + To support `scipy` distributions we implement various optimized [`Distribution`][ConfigSpace.hyperparameters.distributions.Distribution]s + + * [`ScipyContinuousDistribution`][ConfigSpace.hyperparameters.distributions.ScipyContinuousDistribution]: + Samples from a continuous scipy distribution. + * [`ScipyDiscreteDistribution`][ConfigSpace.hyperparameters.distributions.ScipyDiscreteDistribution]: + Samples from a discrete scipy distribution. + * [`DiscretizedContinuousScipyDistribution`][ConfigSpace.hyperparameters.distributions.DiscretizedContinuousScipyDistribution]: + Samples from a continuous scipy distribution, but discretizes the output efficiently. + + The also often provide a `neighborhood` method to sample around a point that can be used, as well as a + `pdf` method, which can do so efficiently in both memory and time. + Please refer to their individual API documentation for more information on how to create and use them. + +### Transforming from Vectorized Space to Value Space +To convert from the vectorized space to the value space, we will need to implement a +[`Transformer`][ConfigSpace.hyperparameters.hp_components.Transformer] that can map the vectorized space to the +value space, e.g. `(0.0, 1.0)` to `(1, 5)`. + +To do this, we provide a convenience class called [`UnitScaler`][ConfigSpace.hyperparameters.hp_components.UnitScaler], +which also allows for a `log=` scale transformation. + +```python exec="True" source="material-block" result="python" +import numpy as np +from scipy.stats import beta as spbeta +from ConfigSpace.hyperparameters.distributions import DiscretizedContinuousScipyDistribution +from ConfigSpace.hyperparameters.hp_components import UnitScaler + +# Define the distribution sampler +alpha, beta = 3, 2 +vector_distribution = DiscretizedContinuousScipyDistribution( + rv=spbeta(alpha, beta), + steps=5, + lower_vectorized=np.float64(0), + upper_vectorized=np.float64(1), +) +vector_samples = vector_distribution.sample_vector(n=5) +print(vector_samples) + +# Define the transformer from the samplers range to the range we care about +transformer = UnitScaler( + lower_value=np.int64(1), + upper_value=np.int64(5), + dtype=np.int64, # We want integers in value space + log=False, +) +integer_values = transformer.to_value(vector_samples) +print(integer_values) + +back_to_vector = transformer.to_vector(integer_values) +print(back_to_vector) +``` + +You are of course free to implement your own [`Transformer`][ConfigSpace.hyperparameters.hp_components.Transformer] +if you require a more complex transformation, however where possible, the +[`UnitScaler`][ConfigSpace.hyperparameters.hp_components.UnitScaler] is preffered as it handles some edge cases +and performs some optimized routines while remaining fully within the expected API. + +### Creating the BetaIntegerHyperparameter class +Below we provide what is essentially the entire `BetaIntegerHyperparameter` in `ConfigSpace`. +Nothing else is required and you can hotswap this out with other kinds of distributions if you require +new kinds of `Hyperparameters`. Most libraries using `ConfigSpace` who do not require explicit kinds +of hyperparameters should be able to utilize these. + + +!!! note + + We use dataclasses in ConfigSpace, which means that inherting classes should also be a + dataclass. This is not a strict requirement, but it is recommended to keep the API consistent. + +```python +from typing import TypeAlias, Union, Mapping, Hashable, Any +import numpy as np +from scipy.stats import beta as spbeta + +from ConfigSpace.hyperparameters import IntegerHyperparameter +from ConfigSpace.hyperparameters.distributions import DiscretizedContinuousScipyDistribution +from ConfigSpace.hyperparameters.hp_components import UnitScaler +from ConfigSpace.functional import is_close_to_integer_single + +i64 = np.int64 +f64 = np.float64 + +# We allow any kind of number to be used, we will cast as required +Number: TypeAlias = Union[int, float, np.number] + +@dataclass(init=False) # We provide our own init +class BetaIntegerHyperparamter(IntegerHyperparameter): + ORDERABLE: ClassVar[bool] = True # Let ConfigSpace know there is an order to the values + + alpha: float + """Some docstring decsription of this attribute.""" + + beta: float + lower: float + upper: float + log: bool + name: str + default_value: float + meta: Mapping[Hashable, Any] | None + + size: float = field(init=False) # This will be calculated + + def __init__( + self, + name: str, + alpha: Number, + beta: Number, + lower: Number, + upper: Number, + default_value: Number | None = None, + log: bool = False, + meta: Mapping[Hashable, Any] | None = None, + ) -> None: + if (alpha < 1) or (beta < 1): + raise ValueError( + "Please provide values of alpha and beta larger than or equal to" + "1 so that the probability density is finite.", + ) + self.alpha = float(alpha) + self.beta = float(beta) + self.lower = int(np.rint(lower)) + self.upper = int(np.rint(upper)) + self.log = bool(log) + + # Create the transformer + try: + scaler = UnitScaler(i64(self.lower), i64(self.upper), log=log, dtype=i64) + except ValueError as e: + raise ValueError(f"Hyperparameter '{name}' has illegal settings") from e + + + if default_value is None: + # Get the mode of the distribution for setting a default + if (self.alpha > 1) or (self.beta > 1): + vectorized_mode = (self.alpha - 1) / (self.alpha + self.beta - 2) + else: + # If both alpha and beta are 1, we have a uniform distribution. + vectorized_mode = 0.5 + + _default_value = np.rint( + scaler.to_value(np.array([vectorized_mode]))[0], + ).astype(i64) + else: + if not is_close_to_integer_single(default_value): + raise TypeError( + f"`default_value` for hyperparameter '{name}' must be an integer." + f" Got '{type(default_value).__name__}' for {default_value=}.", + ) + + _default_value = np.rint(default_value).astype(i64) + + size = int(self.upper - self.lower + 1) + vector_dist = DiscretizedContinuousScipyDistribution( + rv=spbeta(self.alpha, self.beta), # type: ignore + steps=size, + lower_vectorized=f64(0.0), + upper_vectorized=f64(1.0), + ) + + super().__init__( + name=name, + size=size, + default_value=_default_value, + meta=meta, + transformer=scaler, + vector_dist=vector_dist, + neighborhood=vector_dist.neighborhood, + # Tell ConfigSpace we expect an `int` when giving back a single value + # For a np.ndarray of values, this will be `np.int64` + value_cast=int, + # This method comes from the IntegerHyperparameter + # you can implement this you self if you'd like + neighborhood_size=self._integer_neighborhood_size, + ) +``` diff --git a/docs/reference/serialization.md b/docs/reference/serialization.md new file mode 100644 index 00000000..d66417bd --- /dev/null +++ b/docs/reference/serialization.md @@ -0,0 +1,110 @@ +## Serialization + +ConfigSpaces overs two primary methods of serialization, namely `json` and `yaml`. +Serializing is straight forward and can be done using the methods +[`configspace.to_json()`][ConfigSpace.configuration_space.ConfigurationSpace.to_json] +and [`configspace.to_yaml()`][ConfigSpace.configuration_space.ConfigurationSpace.to_yaml]. +To deserialize, you can call the corresponding classmethods +[`ConfigurationSpace.from_json()`][ConfigSpace.configuration_space.ConfigurationSpace.from_json] +and [`ConfigurationSpace.from_yaml()`][ConfigSpace.configuration_space.ConfigurationSpace.from_yaml]. + +```python +from ConfigSpace import ConfigurationSpace +cs = ConfigurationSpace({"a": (0, 10), "b": ["cat", "dog"]}) +cs.to_json("configspace.json") +cs = ConfigurationSpace.from_json("configspace.json") + +cs.to_yaml("configspace.yaml") +cs = ConfigurationSpace.from_yaml("configspace.yaml") +``` + +### Plain type dict +We also support exporting the configuration space as a dictionary with plain simple python types. +This allows for easy serialization to other formats the support dictionary formats, for example, `toml`. + +This is provided through [`to_serialized_dict()`][ConfigSpace.configuration_space.ConfigurationSpace.to_serialized_dict] +and [`from_serialized_dict()`][ConfigSpace.configuration_space.ConfigurationSpace.from_serialized_dict]. + +### Custom Encoding and Decoding +To support custom hyperparameters or various other purposes, we allow you to include custom methods +for encoding and decoding, based on the type encountered. + +#### Encoding +For example, all serializing methods accept an `encoders=` parameter, which is a dictionary of +`type: (type_name_as_str, encoder)` pairs. + +For example: +```python exec="True" source="material-block" result="python" +from typing import Any, Callable +from ConfigSpace import ConfigurationSpace, CategoricalHyperparameter + +cs = ConfigurationSpace({"a": ["cat", "dog"]}) + +def my_custom_encoder( + hp: CategoricalHyperparameter, + encoder: Callable[[Any], dict], +) -> dict: + return { + "name": hp.name, + "choices": [f"!{c}!" for c in hp.choices], + } + +without_custom_encoder = cs.to_serialized_dict() +with_custom_encoder = cs.to_serialized_dict( + # Overrides the default encoder for CategoricalHyperparameters + encoders={ + CategoricalHyperparameter: ("my_category", my_custom_encoder), + } +) +print(without_custom_encoder) +print("--------") +print(with_custom_encoder) +``` + +The second argument to the encoder is a callable that can be used to encode any nested types, +deferring to the encoder for that type. This is useful for types such as conditionals or forbidden clauses, +which often contain hyperparameters within them. + +#### Decoding +Decoding is quite similar with a few minor differences to specification. + +```python +def my_decoder( + # The dictionary that needs to be decoded into a type + d: dict[str, Any], + # The current state of the ConfigurationSpace being decoded + space: ConfigurationSpace, + # A callable to offload decoding of nested types + decoder: Callable +) -> Any: + ... +``` + +As things such as conditions and forbidden clauses rely on hyperparmeters to be decoded first, +you need to specify what _kind_ of thing your decoder will operate on, +namely `"hyperparameters"`, `"conditions"` or `"forbiddens"`. + +```python +my_configspace = ConfigurationSpace.from_serialized_dict( + my_serialized_dict, + # Overrides the default decoder for CategoricalHyperparameters + decoders={ + "hyperparameters": { + "my_category": my_decoder, + }, + "conditions": {}, # No need to specify, just here for completeness + "forbiddens": {}, # No need to specify, just here for completeness + } +) +``` + + +### PCS +A common format for serialization of configuration spaces used to be the `PCS` format. +For those familiar with this, we still provide this using +[`ConfigSpace.read_and_write.pcs_new.read()`][ConfigSpace.read_and_write.pcs_new.read] +and [`ConfigSpace.read_and_write.pcs_new.write()`][ConfigSpace.read_and_write.pcs_new.write]. + +However this format is no longer directly supported and will issue deprecation warnings. +Going forward, we recommend using `json` or `yaml` where possible, as newer version of +ConfigSpace may include features not supported by the `PCS` format. diff --git a/ConfigSpace/py.typed b/docs/reference/utils.md similarity index 100% rename from ConfigSpace/py.typed rename to docs/reference/utils.md diff --git a/docs/scripts/api_generator.py b/docs/scripts/api_generator.py new file mode 100644 index 00000000..038ed9dd --- /dev/null +++ b/docs/scripts/api_generator.py @@ -0,0 +1,42 @@ +"""Generate the code reference pages and navigation. + +# https://mkdocstrings.github.io/recipes/ +""" +from __future__ import annotations + +import logging +from pathlib import Path + +import mkdocs_gen_files + +logger = logging.getLogger(__name__) + +# Modules whose members should not include inherited attributes or methods +# NOTE: Given the current setup, we can only operate at a module level. +# Ideally we specify options (at least at a module level) and we render +# them into strings using a yaml parser. For now this is fine though +NO_INHERITS = ("sklearn.evaluation",) +TAB = " " + +for path in sorted(Path("src").rglob("*.py")): + module_path = path.relative_to("src").with_suffix("") + doc_path = path.relative_to("src").with_suffix(".md") + full_doc_path = Path("api", doc_path) + + parts = tuple(module_path.parts) + + if parts[-1] in ("__main__", "__version__", "__init__"): + continue + + if any(part.startswith("_") for part in parts): + continue + + with mkdocs_gen_files.open(full_doc_path, "w") as fd: + ident = ".".join(parts) + fd.write(f"::: {ident}") + + if ident.endswith(NO_INHERITS): + fd.write(f"\n{TAB}options:") + fd.write(f"\n{TAB}{TAB}inherited_members: false") + + mkdocs_gen_files.set_edit_path(full_doc_path, path) diff --git a/docs/scripts/cleanup_log_output.py b/docs/scripts/cleanup_log_output.py new file mode 100644 index 00000000..cb0d8188 --- /dev/null +++ b/docs/scripts/cleanup_log_output.py @@ -0,0 +1,45 @@ +"""The module is a hook which disables warnings and log messages which pollute the +doc build output. + +One possible downside is if one of these modules ends up giving an actual +error, such as OpenML failing to retrieve a dataset. I tried to make sure ERROR +log message are still allowed through. +""" + +from __future__ import annotations + +import logging +import warnings +from typing import Any + +import mkdocs +import mkdocs.plugins +import mkdocs.structure.pages + +log = logging.getLogger("mkdocs") + + +@mkdocs.plugins.event_priority(-50) +def on_startup(**kwargs: Any) -> None: + # We get a load of deprecation warnings from SMAC + warnings.filterwarnings("ignore", category=DeprecationWarning) + + # We ignore AutoWarnings as our example tend to rely on + # a lot of the `"auto"` parameters + warnings.filterwarnings( + "ignore", + category=DeprecationWarning, + message=".*pcs_new.*", + ) + + +def on_pre_page( + page: mkdocs.structure.pages.Page, + config: Any, + files: Any, +) -> mkdocs.structure.pages.Page | None: + # NOTE: mkdocs says they're always normalized to be '/' seperated + # which means this should work on windows as well. + logging.getLogger("smac").setLevel(logging.ERROR) + logging.getLogger("openml").setLevel(logging.ERROR) + return page diff --git a/docs/scripts/debug_which_page_is_being_rendered.py b/docs/scripts/debug_which_page_is_being_rendered.py new file mode 100644 index 00000000..5f8b642f --- /dev/null +++ b/docs/scripts/debug_which_page_is_being_rendered.py @@ -0,0 +1,24 @@ +"""This module is a hook that when any code is being rendered, it will +print the path to the file being rendered. + +This makes it easier to identify which file is being rendered when an error happens. +""" +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +import mkdocs +import mkdocs.plugins + +if TYPE_CHECKING: + import mkdocs.structure.pages + +log = logging.getLogger("mkdocs") + +def on_pre_page( + page: mkdocs.structure.pages.Page, + config: Any, + files: Any, +) -> mkdocs.structure.pages.Page | None: + log.info(f"{page.file.src_path}") diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..bc1613ec --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,187 @@ +# This project uses mkdocs to generate the documentation. +# Specifically it uses the mkdocs-material theme, which provides a whole +# host of nice features and customization +# +# mkdocs: https://www.mkdocs.org/getting-started/#getting-started-with-mkdocs +# mkdocs-material: https://squidfunk.github.io/mkdocs-material/ +# +# Please refer to these links for more information on how to use mkdocs +# +# For serving the docs locally, you can take a look at the `justfile` at +# the root of this repository, it contains a few commands for generating the docs +# with different levels of execution. +# +# Please refer to individual sections for any additional notes +site_name: "ConfigSpace" +repo_url: https://github.com/automl/ConfigSpace/ +repo_name: automl/ConfigSpace + +theme: + name: material + logo: assets/logo.png + favicon: assets/logo.png + icon: + repo: fontawesome/brands/github + features: + - content.code.annotate + - content.code.copy + - navigation.footer + - navigation.sections + - toc.follow + - toc.integrate + - navigation.tabs + - navigation.tabs.sticky + - header.autohide + - search.suggest + - search.highlight + - search.share + font: + text: Roboto + code: Roboto Mono + palette: + - scheme: slate + media: "(prefers-color-scheme: dark)" + primary: indigo + accent: deep purple + toggle: + icon: material/eye-outline + name: Switch to light mode + + # Palette toggle for light mode + - scheme: default + media: "(prefers-color-scheme: light)" + primary: indigo + accent: deep purple + toggle: + icon: material/eye + name: Switch to dark mode + + +# The `mike` versioning provider +# https://github.com/jimporter/mike +# +# This is what allows us to create versioned docs in the github cli +extra: + version: + provider: mike + social: + - icon: fontawesome/brands/github + link: https://github.com/automl + - icon: fontawesome/brands/twitter + link: https://twitter.com/automl_org + +watch: + - src/ConfigSpace + - docs + +markdown_extensions: + - admonition + - tables + - attr_list + - md_in_html + - toc: + permalink: "#" + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.magiclink: + hide_protocol: true + repo_url_shortener: true + repo_url_shorthand: true + user: automl + repo: amltk + - pymdownx.highlight + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.details + - pymdownx.tabbed: + alternate_style: true + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + +# These are files that are run when serving the docs. +hooks: + # This hook simply prints the page being rendered for an easier time debugging + # any issues with code in docs + - docs/scripts/debug_which_page_is_being_rendered.py + # Removes some deprecation warnings we maintain + - docs/scripts/cleanup_log_output.py + +plugins: + - search + - autorefs + - glightbox + - offline: + enabled: !ENV [AMLTK_DOCS_OFFLINE, false] + - markdown-exec + - mike: + version_selector: true + css_dir: css + javascript_dir: js + canonical_version: latest + - gen-files: + scripts: + - docs/scripts/api_generator.py + - literate-nav: + nav_file: SUMMARY.md + - mkdocstrings: + default_handler: python + enable_inventory: true + handlers: + python: + paths: [src] + # Extra objects which allow for linking to external docs + import: + - 'https://docs.python.org/3/objects.inv' + - 'https://numpy.org/doc/stable/objects.inv' + - 'https://scikit-learn.org/stable/objects.inv' + # Please do not try to change these without having + # looked at all of the documentation and seeing if it + # causes the API docs to look weird anywhere. + options: # https://mkdocstrings.github.io/python/usage/ + docstring_section_style: spacy + docstring_options: + ignore_init_summary: true + trim_doctest_flags: true + returns_multiple_items: false + show_docstring_attributes: true + show_docstring_description: true + show_root_heading: true + show_root_toc_entry: true + show_object_full_path: false + show_root_members_full_path: false + signature_crossrefs: true + merge_init_into_class: true + show_symbol_type_heading: true + show_symbol_type_toc: true + docstring_style: google + inherited_members: true + show_if_no_docstring: false + show_bases: true + show_source: true + members_order: "alphabetical" + group_by_category: true + show_signature: true + separate_signature: true + show_signature_annotations: true + filters: + - "!^_[^_]" + +nav: + - Home: "index.md" + - Quickstart: "quickstart.md" + - Guide: "guide.md" + - Reference: + - Hyperparameters: "reference/hyperparameters.md" + - Configuration Space: "reference/configuration_space.md" + - Configuration: "reference/configuration.md" + - Conditions: "reference/conditions.md" + - Forbidden Clauses: "reference/forbiddens.md" + - Serialization: "reference/serialization.md" + - Util: "reference/utils.md" + - API: "api/" diff --git a/pyproject.toml b/pyproject.toml index 533a6fc9..cfd1d4f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,12 @@ [project] name = "ConfigSpace" -version = "0.7.2" +version = "0.8.0" description = """\ Creation and manipulation of parameter configuration spaces for \ automated algorithm configuration and hyperparameter tuning. \ """ license.file = "LICENSE" -requires-python = ">=3.7" +requires-python = ">=3.8" readme = "README.md" authors = [ { name = "Matthias Feurer" }, @@ -37,7 +37,6 @@ keywords = [ "box", ] classifiers = [ - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -66,28 +65,24 @@ dependencies = [ ] [project.optional-dependencies] -dev = [ - "mypy", - "pre-commit", - "build", - "ruff", - "black", -] - -test = [ - "pytest>=4.6", - "pytest-cov", -] - +dev = ["ConfigSpace[test, tooling, docs]"] +test = ["pytest>=7", "pytest-cov", "pytest_cases"] +tooling = ["mypy", "pre-commit", "ruff", "types-pyyaml"] docs = [ - "automl_sphinx_theme>=0.1.11", + "mkdocs", + "mkdocs-material", + "mkdocs-autorefs", + "mkdocs-gen-files", + "mkdocs-literate-nav", + "mkdocs-glightbox", + "mkdocstrings[python]", + "markdown-exec[ansi]", + "mike", + "pillow", + "cairosvg", + "black", # This allows mkdocstrings to format signatures in the docs ] -[build-system] -requires = ["setuptools", "wheel", "oldest-supported-numpy", "Cython"] -build-backend = "setuptools.build_meta" - - [tool.pytest.ini_options] testpaths = ["test"] minversion = "7.0" @@ -112,19 +107,17 @@ exclude_lines = [ "if TYPE_CHECKING", ] -[tool.black] -target-version = ['py37'] -line-length = 100 - # https://github.com/charliermarsh/ruff [tool.ruff] -target-version = "py37" -line-length = 100 -show-source = true -src = ["ConfigSpace", "test"] +target-version = "py38" +line-length = 88 +output-format = "full" +src = ["src", "test"] +[tool.ruff.lint] # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +extend-safe-fixes = ["ALL"] select = [ "A", @@ -176,8 +169,8 @@ select = [ ] ignore = [ + "T201", # TODO: Remove "D100", - "D101", # Missing docstring in public class "D104", # Missing docstring in public package "D105", # Missing docstring in magic mthod "D203", # 1 blank line required before class docstring @@ -194,8 +187,8 @@ ignore = [ "PLR0912", # Too many branches "PLR0913", # Too many arguments to function call "PLR2004", # Magic constants - "N999", # Invalid Module name - "N802", # Function name should be lowercase + "N999", # Invalid Module name + "N802", # Function name should be lowercase # These tend to be lighweight and confuse pyright ] @@ -220,12 +213,10 @@ exclude = [ "node_modules", "venv", "docs", - # This is vendored, ignore it - "ConfigSpace/nx/**" ] # Exclude a variety of commonly ignored directories. -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "test/*.py" = [ "S101", "D102", @@ -246,22 +237,14 @@ exclude = [ "PLR0915", "BLE001", ] -"setup.py" = ["D102"] "__init__.py" = ["I002"] -"ConfigSpace/read_and_write/pcs_new.py" = [ - "N816", - "D103", - "PLW2901", -] -"ConfigSpace/read_and_write/pcs.py" = [ - "N816", - "D103", - "PLW2901", - "T201", -] +"src/ConfigSpace/read_and_write/pcs_new.py" = ["N816", "D103", "PLW2901"] +"src/ConfigSpace/read_and_write/pcs.py" = ["N816", "D103", "PLW2901", "T201"] +"scripts/*.py" = ["D103", "INP001"] +"docs/conf.py" = ["INP001"] -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["ConfigSpace"] no-lines-before = ["future"] required-imports = ["from __future__ import annotations"] @@ -269,13 +252,15 @@ combine-as-imports = true extra-standard-library = ["typing_extensions"] force-wrap-aliases = true -[tool.ruff.pydocstyle] -convention = "numpy" +[tool.ruff.lint.pydocstyle] +convention = "google" +[tool.ruff.lint.pylint] +max-args = 10 # Changed from default of 5 [tool.mypy] -python_version = "3.7" -packages = ["ConfigSpace", "test"] +python_version = "3.8" +packages = ["src/ConfigSpace", "test"] show_error_codes = true @@ -284,8 +269,8 @@ warn_unused_configs = true # warn about unused [tool.mypy] lines follow_imports = "normal" # Type check top level api code we use from imports ignore_missing_imports = false # prefer explicit ignores -disallow_untyped_defs = true # All functions must have types -disallow_incomplete_defs = true # ...all types +disallow_untyped_defs = true # All functions must have types +disallow_incomplete_defs = true # ...all types disallow_untyped_decorators = false # ... but not decorators no_implicit_optional = true @@ -293,15 +278,6 @@ check_untyped_defs = true warn_return_any = true -[[tool.mypy.overrides]] -module = [ - "ConfigSpace.hyperparameters.*", - "ConfigSpace.conditions.*", - "ConfigSpace.forbidden.*", - "ConfigSpace.c_util.*", -] -ignore_missing_imports = true - [[tool.mypy.overrides]] module = ["test.*"] disallow_untyped_defs = false # Sometimes we just want to ignore verbose types @@ -309,5 +285,13 @@ disallow_untyped_decorators = false # Test decorators are not properly typed disallow_incomplete_defs = false # Sometimes we just want to ignore verbose types [[tool.mypy.overrides]] -module = ["ConfigSpace.nx.*"] # This is vendored, we ignore it +module = ["scipy.*"] +ignore_missing_imports = true + +# These modules are no longer recieving support +[[tool.mypy.overrides]] +module = [ + "ConfigSpace.read_and_write.pcs", + "ConfigSpace.read_and_write.pcs_new", +] ignore_errors = true diff --git a/scripts/benchmark-is-valid.py b/scripts/benchmark-is-valid.py new file mode 100644 index 00000000..c139a268 --- /dev/null +++ b/scripts/benchmark-is-valid.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import os +import time + +import numpy as np + +from ConfigSpace.read_and_write.pcs import read as read_pcs +from ConfigSpace.util import get_one_exchange_neighbourhood + + +def run_test(configuration_space_path): + if "2017_11" not in configuration_space_path: + return + + with open(configuration_space_path) as fh: + cs = read_pcs(fh) + + print("###") + print(configuration_space_path, flush=True) + + configs = [] + times = [] + + # Sample a little bit + for i in range(20): + cs.seed(i) + configurations = cs.sample_configuration(size=10) + for j, c in enumerate(configurations): + neighborhood = get_one_exchange_neighbourhood( + c, + seed=i * j, + num_neighbors=4, + ) + configs.extend(list(neighborhood)) + + for c in configs: + t0 = time.time() + c.check_valid_configuration() + t1 = time.time() + times.append(t1 - t0) + + print("Average time checking one configuration", np.mean(times)) + + +this_file = os.path.abspath(__file__) +this_directory = os.path.dirname(this_file) +configuration_space_path = os.path.join( + this_directory, + "..", + "test", + "test_searchspaces", +) +configuration_space_path = os.path.abspath(configuration_space_path) +pcs_files = os.listdir(configuration_space_path) + +for pcs_file in pcs_files: + if ".pcs" in pcs_file: + full_path = os.path.join(configuration_space_path, pcs_file) + run_test(full_path) + +# ------------ +# Average time sampling 100 configurations 0.0115247011185 +# Average time retrieving a nearest neighbor 0.00251974105835 +# Average time checking one configuration 0.000194481347553 + +# is_close_integer +# Average time sampling 100 configurations 0.1998179078102112 +# Average time retrieving a nearest neighbor 0.023387917677561442 +# Average time checking one configuration 0.0012463332840478253 + +# /home/skantify/code/ConfigSpace/test/test_searchspaces/auto-sklearn_2017_11_17.pcs +# Average time sampling 100 configurations 0.05419049263000488 +# Average time retrieving a nearest neighbor 0.01149404075410631 +# Average time checking one configuration 0.0006455589667150331 diff --git a/scripts/benchmark-neighbors.py b/scripts/benchmark-neighbors.py new file mode 100644 index 00000000..d0642ddb --- /dev/null +++ b/scripts/benchmark-neighbors.py @@ -0,0 +1,66 @@ +# /home/feurerm/projects/ConfigSpace/test/test_searchspaces/auto-sklearn_2017_11_17.pcs +# Average time sampling 100 configurations 0.0115247011185 +# Average time retrieving a nearest neighbor 0.00251974105835 +# Average time checking one configuration 0.000194481347553 +from __future__ import annotations + +import os +import time + +import numpy as np + +import ConfigSpace +import ConfigSpace.read_and_write.pcs as pcs_parser +import ConfigSpace.util + +n_configs = 100 + + +def run_test(configuration_space_path): + if "2017_11" not in configuration_space_path: + return + + with open(configuration_space_path) as fh: + cs = pcs_parser.read(fh) + + print("###") + print(configuration_space_path, flush=True) + + neighborhood_time = [] + + for i in range(3): + cs.seed(i) + rs = np.random.RandomState(i) + configurations = cs.sample_configuration(size=n_configs) + for c in configurations: + c.check_valid_configuration() + + for _j, c in enumerate(configurations): + start_time = time.time() + neighborhood = ConfigSpace.util.get_one_exchange_neighbourhood( + c, + seed=rs, + num_neighbors=4, + ) + _ns = list(neighborhood) + end_time = time.time() + neighborhood_time.append(end_time - start_time) + + print(f"Average time retrieving a nearest neighbor {np.mean(neighborhood_time):f}") + + +this_file = os.path.abspath(__file__) +this_directory = os.path.dirname(this_file) +configuration_space_path = os.path.join( + this_directory, + "..", + "test", + "test_searchspaces", +) +configuration_space_path = os.path.abspath(configuration_space_path) +pcs_files = os.listdir(configuration_space_path) + +for pcs_file in pcs_files: + if ".pcs" in pcs_file: + full_path = os.path.join(configuration_space_path, pcs_file) + run_test(full_path) diff --git a/scripts/benchmark-only-sampling.py b/scripts/benchmark-only-sampling.py new file mode 100644 index 00000000..42e578e1 --- /dev/null +++ b/scripts/benchmark-only-sampling.py @@ -0,0 +1,68 @@ +# /home/feurerm/projects/ConfigSpace/test/test_searchspaces/auto-sklearn_2017_11_17.pcs +# Average time sampling 100 configurations 0.0115247011185 +# Average time retrieving a nearest neighbor 0.00251974105835 +# Average time checking one configuration 0.000194481347553 +from __future__ import annotations + +import os +import time + +import numpy as np + +import ConfigSpace.read_and_write.pcs as pcs_parser + +n_configs = 100 + + +def run_test(configuration_space_path): + if "2017_11" not in configuration_space_path: + return + + print("###") + print(configuration_space_path, flush=True) + + with open(configuration_space_path) as fh: + cs = pcs_parser.read(fh) + sampling_time = [] + + # Sample a little bit + for i in range(20): + cs.seed(i) + start_time = time.time() + cs.sample_configuration(size=n_configs) + end_time = time.time() + sampling_time.append(end_time - start_time) + + print("Average time sampling %d configurations" % n_configs, np.mean(sampling_time)) + + +this_file = os.path.abspath(__file__) +this_directory = os.path.dirname(this_file) +configuration_space_path = os.path.join( + this_directory, + "..", + "test", + "test_searchspaces", +) +configuration_space_path = os.path.abspath(configuration_space_path) +pcs_files = os.listdir(configuration_space_path) + +for pcs_file in pcs_files: + if ".pcs" in pcs_file: + full_path = os.path.join(configuration_space_path, pcs_file) + run_test(full_path) + +# ------------ +# Average time sampling 100 configurations 0.0115247011185 +# Average time retrieving a nearest neighbor 0.00251974105835 +# Average time checking one configuration 0.000194481347553 + +# is_close_integer +# Average time sampling 100 configurations 0.1998179078102112 +# Average time retrieving a nearest neighbor 0.023387917677561442 +# Average time checking one configuration 0.0012463332840478253 + +### +# /home/skantify/code/ConfigSpace/test/test_searchspaces/auto-sklearn_2017_11_17.pcs +# Average time sampling 100 configurations 0.2051920175552368 +# Average time sampling 100 configurations 0.06 diff --git a/scripts/benchmark_sampling.py b/scripts/benchmark_sampling.py index 71ed3bbc..0a95cbd0 100644 --- a/scripts/benchmark_sampling.py +++ b/scripts/benchmark_sampling.py @@ -37,6 +37,8 @@ def run_test(configuration_space_path): configurations = cs.sample_configuration(size=n_configs) end_time = time.time() sampling_time.append(end_time - start_time) + for c in configurations: + c.check_valid_configuration() for j, c in enumerate(configurations): if i > 10: @@ -50,11 +52,13 @@ def run_test(configuration_space_path): validation_time = [] for _shuffle, n in enumerate(neighborhood): v_start_time = time.time() - n.is_valid_configuration() + n.check_valid_configuration() v_end_time = time.time() validation_time.append(v_end_time - v_start_time) end_time = time.time() - neighborhood_time.append(end_time - start_time - np.sum(validation_time)) + neighborhood_time.append( + end_time - start_time - np.sum(validation_time), + ) validation_times.extend(validation_time) print("Average time sampling %d configurations" % n_configs, np.mean(sampling_time)) @@ -64,7 +68,12 @@ def run_test(configuration_space_path): this_file = os.path.abspath(__file__) this_directory = os.path.dirname(this_file) -configuration_space_path = os.path.join(this_directory, "..", "test", "test_searchspaces") +configuration_space_path = os.path.join( + this_directory, + "..", + "test", + "test_searchspaces", +) configuration_space_path = os.path.abspath(configuration_space_path) pcs_files = os.listdir(configuration_space_path) @@ -72,3 +81,18 @@ def run_test(configuration_space_path): if ".pcs" in pcs_file: full_path = os.path.join(configuration_space_path, pcs_file) run_test(full_path) + +# ------------ +# Average time sampling 100 configurations 0.0115247011185 +# Average time retrieving a nearest neighbor 0.00251974105835 +# Average time checking one configuration 0.000194481347553 + +# is_close_integer +# Average time sampling 100 configurations 0.1998179078102112 +# Average time retrieving a nearest neighbor 0.023387917677561442 +# Average time checking one configuration 0.0012463332840478253 + +# /home/skantify/code/ConfigSpace/test/test_searchspaces/auto-sklearn_2017_11_17.pcs +# Average time sampling 100 configurations 0.05419049263000488 +# Average time retrieving a nearest neighbor 0.01149404075410631 +# Average time checking one configuration 0.0006455589667150331 diff --git a/setup.py b/setup.py deleted file mode 100644 index 5eb9bee3..00000000 --- a/setup.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Setup.py for ConfigSpace. - -# Profiling -Set the below flag to True to enable profiling of the code. This will cause some minor performance -overhead so it should only be used for debugging purposes. - -Use [`py-spy`](https://github.com/benfred/py-spy) with [speedscope.app](https://www.speedscope.app/) -```bash -pip install py-spy -py-spy record --rate 800 --format speedscope --subprocesses --native -o profile.svg -- python