Merge pull request #20 from genos/updates-20220125

lmc2179 · web-flow · commit 6daabc2959d6 · 2022-03-12T10:49:02.000-05:00
Updates 20220125
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,208 @@
+
+# Created by https://www.toptal.com/developers/gitignore/api/macos,python,vim
+# Edit at https://www.toptal.com/developers/gitignore?templates=macos,python,vim
+
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Vim ###
+# Swap
+[._]*.s[a-v][a-z]
+!*.svg  # comment out if you don't need vector files
+[._]*.sw[a-p]
+[._]s[a-rt-v][a-z]
+[._]ss[a-gi-z]
+[._]sw[a-p]
+
+# Session
+Session.vim
+Sessionx.vim
+
+# Temporary
+.netrwhist
+*~
+# Auto-generated tag files
+tags
+# Persistent undo
+[._]*.un~
+
+# End of https://www.toptal.com/developers/gitignore/api/macos,python,vim
diff --git a/README.md b/README.md
@@ -115,7 +115,7 @@ Users interested in accessing the base models can do so via the `base_models_` a
 
 Interested in contributing? We'd love to have your help! Please keep the following in mind:
 
-* Bug fixes are welcome! Make sure you reference the issue number that is being resolved, and that all test cases in `tests` pass on both Python 2.7 and 3.4/3.5.
+* Bug fixes are welcome! Make sure you reference the issue number that is being resolved, and that all test cases in `tests` pass.
 
 * New features are welcome as well! Any new features should include docstrings and unit tests in the `tests` directory.
 
diff --git a/bayesian_bootstrap/__init__.py b/bayesian_bootstrap/__init__.py
@@ -275,9 +275,7 @@ def central_credible_interval(samples, alpha=0.05):
 
     Returns: Left and right interval bounds (tuple)
     """
-    tail_size = int(round(len(samples) * (alpha / 2)))
-    samples_sorted = sorted(samples)
-    return samples_sorted[tail_size], samples_sorted[-tail_size - 1]
+    return np.quantile(samples, alpha / 2), np.quantile(samples, 1 - alpha / 2)
 
 
 def highest_density_interval(samples, alpha=0.05):
diff --git a/bayesian_bootstrap/tests/test_bootstrap.py b/bayesian_bootstrap/tests/test_bootstrap.py
@@ -1,7 +1,6 @@
 import unittest
 import numpy as np
 import scipy
-import random
 import bayesian_bootstrap as bb
 from bayesian_bootstrap import (
     mean,
@@ -14,6 +13,8 @@
 )
 from sklearn.linear_model import LinearRegression
 
+RNG = np.random.default_rng(1337)  # repeatable pseudorandomness
+
 
 class TestMoments(unittest.TestCase):
     def test_mean(self):
@@ -23,18 +24,18 @@ def test_mean(self):
         self.assertAlmostEqual(len([s for s in posterior_samples if s < 0]), 5000, delta=1000)
 
     def test_variance(self):
-        X = np.random.uniform(-1, 1, 500)
+        X = RNG.uniform(-1, 1, 500)
         posterior_samples = var(X, 10000)
         self.assertAlmostEqual(np.mean(posterior_samples), 1 / 3.0, delta=0.05)
 
     def test_self_covar(self):
-        X = np.random.uniform(-1, 1, 500)
+        X = RNG.uniform(-1, 1, 500)
         posterior_samples = covar(X, X, 10000)
         self.assertAlmostEqual(np.mean(posterior_samples), np.var(X), delta=0.05)
 
     def test_covar(self):
-        X = np.random.uniform(-1, 1, 500)
-        Y = np.random.uniform(-1, 1, 500)
+        X = RNG.uniform(-1, 1, 500)
+        Y = RNG.uniform(-1, 1, 500)
         posterior_samples = covar(X, Y, 10000)
         self.assertAlmostEqual(np.mean(posterior_samples), 0, delta=0.05)
 
@@ -48,25 +49,25 @@ def test_mean_resample(self):
         self.assertAlmostEqual(len([s for s in posterior_samples if s < 0]), 5000, delta=1000)
 
     def test_var_resample(self):
-        X = np.random.uniform(-1, 1, 500)
+        X = RNG.uniform(-1, 1, 500)
         posterior_samples = bayesian_bootstrap(X, np.var, 10000, 5000, low_mem=True)
         self.assertAlmostEqual(np.mean(posterior_samples), 1 / 3.0, delta=0.05)
-        X = np.random.uniform(-1, 1, 500)
+        X = RNG.uniform(-1, 1, 500)
         posterior_samples = bayesian_bootstrap(X, np.var, 10000, 5000, low_mem=False)
         self.assertAlmostEqual(np.mean(posterior_samples), 1 / 3.0, delta=0.05)
 
 
 class TestIntervals(unittest.TestCase):
     def test_central_credible_interval(self):
-        l, r = central_credible_interval(self._shuffle(list(range(10))), alpha=0.2)
-        self.assertEqual(l, 1)
-        self.assertEqual(r, 8)
-        l, r = central_credible_interval(self._shuffle(list(range(10))), alpha=0.19)
-        self.assertEqual(l, 1)
-        self.assertEqual(r, 8)
-        l, r = central_credible_interval(self._shuffle(list(range(20))), alpha=0.1)
-        self.assertEqual(l, 1)
-        self.assertEqual(r, 18)
+        l, r = central_credible_interval(self._shuffle(range(10)), alpha=0.2)
+        self.assertEqual(l, 0.9)
+        self.assertEqual(r, 8.1)
+        l, r = central_credible_interval(self._shuffle(range(10)), alpha=0.19)
+        self.assertEqual(l, 0.855)
+        self.assertEqual(r, 8.145)
+        l, r = central_credible_interval(self._shuffle(range(20)), alpha=0.1)
+        self.assertAlmostEqual(l, 0.95)
+        self.assertEqual(r, 18.05)
 
     def test_hpdi(self):
         l, r = highest_density_interval(self._shuffle([0, 10, 1] + [1.1] * 7), alpha=0.2)
@@ -78,14 +79,14 @@ def test_hpdi(self):
 
     def _shuffle(self, x):
         x = list(x)
-        random.shuffle(x)
+        RNG.shuffle(x)
         return x
 
 
 class TestRegression(unittest.TestCase):
     def test_parameter_estimation_resampling_low_memory(self):
-        X = np.random.uniform(0, 4, 1000)
-        y = X + np.random.normal(0, 1, 1000)
+        X = RNG.uniform(0, 4, 1000)
+        y = X + RNG.normal(0, 1, 1000)
         m = BayesianBootstrapBagging(LinearRegression(), 10000, 1000, low_mem=True)
         m.fit(X.reshape(-1, 1), y)
         coef_samples = [b.coef_ for b in m.base_models_]
@@ -107,8 +108,8 @@ def test_parameter_estimation_resampling_low_memory(self):
         self.assertGreater(r, 0)
 
     def test_parameter_estimation_resampling(self):
-        X = np.random.uniform(0, 4, 1000)
-        y = X + np.random.normal(0, 1, 1000)
+        X = RNG.uniform(0, 4, 1000)
+        y = X + RNG.normal(0, 1, 1000)
         m = BayesianBootstrapBagging(LinearRegression(), 10000, 1000, low_mem=False)
         m.fit(X.reshape(-1, 1), y)
         coef_samples = [b.coef_ for b in m.base_models_]
@@ -130,8 +131,8 @@ def test_parameter_estimation_resampling(self):
         self.assertGreater(r, 0)
 
     def test_parameter_estimation_bayes(self):
-        X = np.random.uniform(0, 4, 1000)
-        y = X + np.random.normal(0, 1, 1000)
+        X = RNG.uniform(0, 4, 1000)
+        y = X + RNG.normal(0, 1, 1000)
         m = BayesianBootstrapBagging(LinearRegression(), 10000, low_mem=False)
         m.fit(X.reshape(-1, 1), y)
         coef_samples = [b.coef_ for b in m.base_models_]
@@ -153,8 +154,8 @@ def test_parameter_estimation_bayes(self):
         self.assertGreater(r, 0)
 
     def test_parameter_estimation_bayes_low_memory(self):
-        X = np.random.uniform(0, 4, 1000)
-        y = X + np.random.normal(0, 1, 1000)
+        X = RNG.uniform(0, 4, 1000)
+        y = X + RNG.normal(0, 1, 1000)
         m = BayesianBootstrapBagging(LinearRegression(), 10000, low_mem=True)
         m.fit(X.reshape(-1, 1), y)
         coef_samples = [b.coef_ for b in m.base_models_]
@@ -182,12 +183,10 @@ def test_pearsonr():
     assert np.mean(bb.pearsonr(x, y, 10000)) == 1
     assert np.mean(bb.pearsonr(x, -y, 10000)) == -1
 
-    np.random.seed(1337)
     x = [0, 1, 3, 6]
     y = [1, 2, 5, 7]
     assert np.isclose(np.mean(bb.pearsonr(x, y, 10000)), scipy.stats.pearsonr(x, y)[0], atol=0.001)
 
-    np.random.seed(1337)
     x = np.linspace(-10, 10, 10000)
     y = np.abs(x)
     assert np.isclose(scipy.stats.pearsonr(x, y)[0], np.mean(bb.pearsonr(x, y, 1000)), atol=0.001)
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,4 @@
-numpy
-scipy
-pandas
-scikit-learn
-tqdm
+numpy>=1.22.1
+scipy>=1.7.3
+scikit-learn>=1.0.2
+tqdm>=4.62.3
diff --git a/setup.py b/setup.py