diff --git a/examples/demo_optim_data_preproc.ipynb b/examples/demo_optim_data_preproc.ipynb index 16273a7c..2c8ce616 100644 --- a/examples/demo_optim_data_preproc.ipynb +++ b/examples/demo_optim_data_preproc.ipynb @@ -1,757 +1,1382 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### This notebook demonstrates the use of an optimized data pre-processing algorithm for bias mitigation\n", - "\n", - "- The debiasing function used is implemented in the `OptimPreproc` class.\n", - "- Define parameters for optimized pre-processing specific to the dataset.\n", - "- Divide the dataset into training, validation, and testing partitions.\n", - "- Learn the optimized pre-processing transformation from the training data.\n", - "- Train classifier on original training data.\n", - "- Estimate the optimal classification threshold, that maximizes balanced accuracy without fairness constraints (from the original validation set).\n", - "- Determine the prediction scores for original testing data. Using the estimated optimal classification threshold, compute accuracy and fairness metrics.\n", - "- Transform the testing set using the learned probabilistic transformation.\n", - "- Determine the prediction scores for transformed testing data. Using the estimated optimal classification threshold, compute accuracy and fairness metrics.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "# Load all necessary packages\n", - "import sys\n", - "sys.path.append(\"../\")\n", - "import numpy as np\n", - "from tqdm import tqdm\n", - "\n", - "from aif360.datasets import BinaryLabelDataset\n", - "from aif360.datasets import AdultDataset, GermanDataset, CompasDataset\n", - "from aif360.metrics import BinaryLabelDatasetMetric\n", - "from aif360.metrics import ClassificationMetric\n", - "from aif360.metrics.utils import compute_boolean_conditioning_vector\n", - "from aif360.algorithms.preprocessing.optim_preproc import OptimPreproc\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\\\n", - " import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.distortion_functions\\\n", - " import get_distortion_adult, get_distortion_german, get_distortion_compas\n", - "from aif360.algorithms.preprocessing.optim_preproc_helpers.opt_tools import OptTools\n", - "from common_utils import compute_metrics\n", - "\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.metrics import accuracy_score\n", - "\n", - "from IPython.display import Markdown, display\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Load dataset and specify options" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# import dataset\n", - "dataset_used = \"adult\" # \"adult\", \"german\", \"compas\"\n", - "protected_attribute_used = 1 # 1, 2\n", - "\n", - "if dataset_used == \"adult\":\n", - " if protected_attribute_used == 1:\n", - " privileged_groups = [{'sex': 1}]\n", - " unprivileged_groups = [{'sex': 0}]\n", - " dataset_orig = load_preproc_data_adult(['sex'])\n", - " else:\n", - " privileged_groups = [{'race': 1}]\n", - " unprivileged_groups = [{'race': 0}]\n", - " dataset_orig = load_preproc_data_adult(['race'])\n", - " \n", - " optim_options = {\n", - " \"distortion_fun\": get_distortion_adult,\n", - " \"epsilon\": 0.05,\n", - " \"clist\": [0.99, 1.99, 2.99],\n", - " \"dlist\": [.1, 0.05, 0]\n", - " }\n", - " \n", - "elif dataset_used == \"german\":\n", - " if protected_attribute_used == 1:\n", - " privileged_groups = [{'sex': 1}]\n", - " unprivileged_groups = [{'sex': 0}]\n", - " dataset_orig = load_preproc_data_german(['sex'])\n", - " optim_options = {\n", - " \"distortion_fun\": get_distortion_german,\n", - " \"epsilon\": 0.05,\n", - " \"clist\": [0.99, 1.99, 2.99],\n", - " \"dlist\": [.1, 0.05, 0]\n", - " }\n", - " \n", - " else:\n", - " privileged_groups = [{'age': 1}]\n", - " unprivileged_groups = [{'age': 0}]\n", - " dataset_orig = load_preproc_data_german(['age'])\n", - " optim_options = {\n", - " \"distortion_fun\": get_distortion_german,\n", - " \"epsilon\": 0.1,\n", - " \"clist\": [0.99, 1.99, 2.99],\n", - " \"dlist\": [.1, 0.05, 0]\n", - " } \n", - "\n", - "elif dataset_used == \"compas\":\n", - " if protected_attribute_used == 1:\n", - " privileged_groups = [{'sex': 1}]\n", - " unprivileged_groups = [{'sex': 0}]\n", - " dataset_orig = load_preproc_data_compas(['sex'])\n", - " else:\n", - " privileged_groups = [{'race': 1}]\n", - " unprivileged_groups = [{'race': 0}]\n", - " dataset_orig = load_preproc_data_compas(['race'])\n", - " \n", - " optim_options = {\n", - " \"distortion_fun\": get_distortion_compas,\n", - " \"epsilon\": 0.05,\n", - " \"clist\": [0.99, 1.99, 2.99],\n", - " \"dlist\": [.1, 0.05, 0]\n", - " }\n", - "\n", - "#random seed\n", - "np.random.seed(1)\n", - "\n", - "# Split into train, validation, and test\n", - "dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.7], shuffle=True)\n", - "dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Display dataset attributes" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "#### Training Dataset shape" + "cells": [ + { + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/master/examples/demo_optim_data_preproc.ipynb)" + ], + "metadata": { + "id": "QFkbHA0zIQ92" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DVW01MmV9bJL" + }, + "source": [ + "#### This notebook demonstrates the use of an optimized data pre-processing algorithm for bias mitigation\n", + "\n", + "- The debiasing function used is implemented in the `OptimPreproc` class.\n", + "- Define parameters for optimized pre-processing specific to the dataset.\n", + "- Divide the dataset into training, validation, and testing partitions.\n", + "- Learn the optimized pre-processing transformation from the training data.\n", + "- Train classifier on original training data.\n", + "- Estimate the optimal classification threshold, that maximizes balanced accuracy without fairness constraints (from the original validation set).\n", + "- Determine the prediction scores for original testing data. Using the estimated optimal classification threshold, compute accuracy and fairness metrics.\n", + "- Transform the testing set using the learned probabilistic transformation.\n", + "- Determine the prediction scores for transformed testing data. Using the estimated optimal classification threshold, compute accuracy and fairness metrics.\n" + ] + }, + { + "cell_type": "code", + "source": [ + "# Install necessary libraries\n", + "!pip install 'aif360'\n", + "!pip install 'aif360[LawSchoolGPA]'\n", + "!pip install 'aif360[Reductions]'" + ], + "metadata": { + "id": "7EzTKEyS9h4z", + "outputId": "8ccff2fc-92db-4228-d80b-a71901342a2f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting aif360\n", + " Downloading aif360-0.5.0-py3-none-any.whl (214 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/214.1 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.6/214.1 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m214.1/214.1 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.10/dist-packages (from aif360) (1.23.5)\n", + "Requirement already satisfied: scipy>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from aif360) (1.11.2)\n", + "Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.10/dist-packages (from aif360) (1.5.3)\n", + "Requirement already satisfied: scikit-learn>=1.0 in /usr/local/lib/python3.10/dist-packages (from aif360) (1.2.2)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from aif360) (3.7.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24.0->aif360) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24.0->aif360) (2023.3.post1)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.0->aif360) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.0->aif360) (3.2.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360) (4.42.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360) (23.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360) (9.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360) (3.1.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas>=0.24.0->aif360) (1.16.0)\n", + "Installing collected packages: aif360\n", + "Successfully installed aif360-0.5.0\n", + "Requirement already satisfied: aif360[LawSchoolGPA] in /usr/local/lib/python3.10/dist-packages (0.5.0)\n", + "Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.10/dist-packages (from aif360[LawSchoolGPA]) (1.23.5)\n", + "Requirement already satisfied: scipy>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from aif360[LawSchoolGPA]) (1.11.2)\n", + "Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.10/dist-packages (from aif360[LawSchoolGPA]) (1.5.3)\n", + "Requirement already satisfied: scikit-learn>=1.0 in /usr/local/lib/python3.10/dist-packages (from aif360[LawSchoolGPA]) (1.2.2)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from aif360[LawSchoolGPA]) (3.7.1)\n", + "Collecting tempeh (from aif360[LawSchoolGPA])\n", + " Downloading tempeh-0.1.12-py3-none-any.whl (39 kB)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24.0->aif360[LawSchoolGPA]) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24.0->aif360[LawSchoolGPA]) (2023.3.post1)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.0->aif360[LawSchoolGPA]) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.0->aif360[LawSchoolGPA]) (3.2.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[LawSchoolGPA]) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[LawSchoolGPA]) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[LawSchoolGPA]) (4.42.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[LawSchoolGPA]) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[LawSchoolGPA]) (23.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[LawSchoolGPA]) (9.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[LawSchoolGPA]) (3.1.1)\n", + "Collecting memory-profiler (from tempeh->aif360[LawSchoolGPA])\n", + " Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)\n", + "Requirement already satisfied: pytest in /usr/local/lib/python3.10/dist-packages (from tempeh->aif360[LawSchoolGPA]) (7.4.1)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from tempeh->aif360[LawSchoolGPA]) (2.31.0)\n", + "Collecting shap (from tempeh->aif360[LawSchoolGPA])\n", + " Downloading shap-0.42.1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (547 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.9/547.9 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas>=0.24.0->aif360[LawSchoolGPA]) (1.16.0)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from memory-profiler->tempeh->aif360[LawSchoolGPA]) (5.9.5)\n", + "Requirement already satisfied: iniconfig in /usr/local/lib/python3.10/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA]) (2.0.0)\n", + "Requirement already satisfied: pluggy<2.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA]) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /usr/local/lib/python3.10/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA]) (1.1.3)\n", + "Requirement already satisfied: tomli>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA]) (2.0.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->tempeh->aif360[LawSchoolGPA]) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->tempeh->aif360[LawSchoolGPA]) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->tempeh->aif360[LawSchoolGPA]) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->tempeh->aif360[LawSchoolGPA]) (2023.7.22)\n", + "Requirement already satisfied: tqdm>=4.27.0 in /usr/local/lib/python3.10/dist-packages (from shap->tempeh->aif360[LawSchoolGPA]) (4.66.1)\n", + "Collecting slicer==0.0.7 (from shap->tempeh->aif360[LawSchoolGPA])\n", + " Downloading slicer-0.0.7-py3-none-any.whl (14 kB)\n", + "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from shap->tempeh->aif360[LawSchoolGPA]) (0.56.4)\n", + "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from shap->tempeh->aif360[LawSchoolGPA]) (2.2.1)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->shap->tempeh->aif360[LawSchoolGPA]) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba->shap->tempeh->aif360[LawSchoolGPA]) (67.7.2)\n", + "Installing collected packages: slicer, memory-profiler, shap, tempeh\n", + "Successfully installed memory-profiler-0.61.0 shap-0.42.1 slicer-0.0.7 tempeh-0.1.12\n", + "Requirement already satisfied: aif360[Reductions] in /usr/local/lib/python3.10/dist-packages (0.5.0)\n", + "Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.10/dist-packages (from aif360[Reductions]) (1.23.5)\n", + "Requirement already satisfied: scipy>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from aif360[Reductions]) (1.11.2)\n", + "Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.10/dist-packages (from aif360[Reductions]) (1.5.3)\n", + "Requirement already satisfied: scikit-learn>=1.0 in /usr/local/lib/python3.10/dist-packages (from aif360[Reductions]) (1.2.2)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from aif360[Reductions]) (3.7.1)\n", + "Collecting fairlearn~=0.7 (from aif360[Reductions])\n", + " Downloading fairlearn-0.9.0-py3-none-any.whl (231 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m231.5/231.5 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24.0->aif360[Reductions]) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24.0->aif360[Reductions]) (2023.3.post1)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.0->aif360[Reductions]) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.0->aif360[Reductions]) (3.2.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[Reductions]) (1.1.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[Reductions]) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[Reductions]) (4.42.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[Reductions]) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[Reductions]) (23.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[Reductions]) (9.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->aif360[Reductions]) (3.1.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas>=0.24.0->aif360[Reductions]) (1.16.0)\n", + "Installing collected packages: fairlearn\n", + "Successfully installed fairlearn-0.9.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import aif360\n", + "\n", + "# Obtain the location where it is installed\n", + "LIB_PATH = aif360.__file__.rsplit(\"aif360\", 1)[0]\n", + "\n", + "# check if the data got download properly\n", + "def check_data_or_download(destn, files, data_source_directory):\n", + " check = all(item in os.listdir(destn) for item in files)\n", + " if check:\n", + " print(\"Adult dataset is available for us\")\n", + " else:\n", + " print(\"Some files are missing. Downloading now.\")\n", + " for data_file in files:\n", + " _ = urllib.request.urlretrieve(data_source_directory + data_file,\n", + " os.path.join(destn, data_file))\n", + "\n", + "# Download adult dataset\n", + "data_source_directory = \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/\"\n", + "destn = os.path.join(LIB_PATH, \"aif360\", \"data\", \"raw\", \"adult\")\n", + "files = [\"adult.data\", \"adult.test\", \"adult.names\"]\n", + "\n", + "check_data_or_download(destn, files, data_source_directory)" + ], + "metadata": { + "id": "kxlfn4ioFiHQ", + "outputId": "2bce72b8-0f76-4d8e-bb47-2d378c25e2de", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Some files are missing. Downloading now.\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ieHTdhTE9bJM" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "# Load all necessary packages\n", + "import sys\n", + "sys.path.append(\"../\")\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "\n", + "from aif360.datasets import BinaryLabelDataset\n", + "from aif360.datasets import AdultDataset, GermanDataset, CompasDataset\n", + "from aif360.metrics import BinaryLabelDatasetMetric\n", + "from aif360.metrics import ClassificationMetric\n", + "from aif360.metrics.utils import compute_boolean_conditioning_vector\n", + "from aif360.algorithms.preprocessing.optim_preproc import OptimPreproc\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\\\n", + " import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.distortion_functions\\\n", + " import get_distortion_adult, get_distortion_german, get_distortion_compas\n", + "from aif360.algorithms.preprocessing.optim_preproc_helpers.opt_tools import OptTools\n", + "\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "from IPython.display import Markdown, display\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "source": [ + "# Metrics function\n", + "from collections import OrderedDict\n", + "from aif360.metrics import ClassificationMetric\n", + "\n", + "def compute_metrics(dataset_true, dataset_pred,\n", + " unprivileged_groups, privileged_groups,\n", + " disp = True):\n", + " \"\"\" Compute the key metrics \"\"\"\n", + " classified_metric_pred = ClassificationMetric(dataset_true,\n", + " dataset_pred,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + " metrics = OrderedDict()\n", + " metrics[\"Balanced accuracy\"] = 0.5*(classified_metric_pred.true_positive_rate()+\n", + " classified_metric_pred.true_negative_rate())\n", + " metrics[\"Statistical parity difference\"] = classified_metric_pred.statistical_parity_difference()\n", + " metrics[\"Disparate impact\"] = classified_metric_pred.disparate_impact()\n", + " metrics[\"Average odds difference\"] = classified_metric_pred.average_odds_difference()\n", + " metrics[\"Equal opportunity difference\"] = classified_metric_pred.equal_opportunity_difference()\n", + " metrics[\"Theil index\"] = classified_metric_pred.theil_index()\n", + "\n", + " if disp:\n", + " for k in metrics:\n", + " print(\"%s = %.4f\" % (k, metrics[k]))\n", + "\n", + " return metrics" ], - "text/plain": [ - "" + "metadata": { + "id": "uGmzNDyTCat9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8-usF1IA9bJN" + }, + "source": [ + "#### Load dataset and specify options" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "(34189, 18)\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RrThA70U9bJO" + }, + "outputs": [], + "source": [ + "# import dataset\n", + "dataset_used = \"adult\" # \"adult\", \"german\", \"compas\"\n", + "protected_attribute_used = 1 # 1, 2\n", + "\n", + "if dataset_used == \"adult\":\n", + " if protected_attribute_used == 1:\n", + " privileged_groups = [{'sex': 1}]\n", + " unprivileged_groups = [{'sex': 0}]\n", + " dataset_orig = load_preproc_data_adult(['sex'])\n", + " else:\n", + " privileged_groups = [{'race': 1}]\n", + " unprivileged_groups = [{'race': 0}]\n", + " dataset_orig = load_preproc_data_adult(['race'])\n", + "\n", + " optim_options = {\n", + " \"distortion_fun\": get_distortion_adult,\n", + " \"epsilon\": 0.05,\n", + " \"clist\": [0.99, 1.99, 2.99],\n", + " \"dlist\": [.1, 0.05, 0]\n", + " }\n", + "\n", + "elif dataset_used == \"german\":\n", + " if protected_attribute_used == 1:\n", + " privileged_groups = [{'sex': 1}]\n", + " unprivileged_groups = [{'sex': 0}]\n", + " dataset_orig = load_preproc_data_german(['sex'])\n", + " optim_options = {\n", + " \"distortion_fun\": get_distortion_german,\n", + " \"epsilon\": 0.05,\n", + " \"clist\": [0.99, 1.99, 2.99],\n", + " \"dlist\": [.1, 0.05, 0]\n", + " }\n", + "\n", + " else:\n", + " privileged_groups = [{'age': 1}]\n", + " unprivileged_groups = [{'age': 0}]\n", + " dataset_orig = load_preproc_data_german(['age'])\n", + " optim_options = {\n", + " \"distortion_fun\": get_distortion_german,\n", + " \"epsilon\": 0.1,\n", + " \"clist\": [0.99, 1.99, 2.99],\n", + " \"dlist\": [.1, 0.05, 0]\n", + " }\n", + "\n", + "elif dataset_used == \"compas\":\n", + " if protected_attribute_used == 1:\n", + " privileged_groups = [{'sex': 1}]\n", + " unprivileged_groups = [{'sex': 0}]\n", + " dataset_orig = load_preproc_data_compas(['sex'])\n", + " else:\n", + " privileged_groups = [{'race': 1}]\n", + " unprivileged_groups = [{'race': 0}]\n", + " dataset_orig = load_preproc_data_compas(['race'])\n", + "\n", + " optim_options = {\n", + " \"distortion_fun\": get_distortion_compas,\n", + " \"epsilon\": 0.05,\n", + " \"clist\": [0.99, 1.99, 2.99],\n", + " \"dlist\": [.1, 0.05, 0]\n", + " }\n", + "\n", + "#random seed\n", + "np.random.seed(1)\n", + "\n", + "# Split into train, validation, and test\n", + "dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.7], shuffle=True)\n", + "dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)" + ] }, { - "data": { - "text/markdown": [ - "#### Favorable and unfavorable labels" + "cell_type": "markdown", + "metadata": { + "id": "LVJsopSk9bJO" + }, + "source": [ + "#### Display dataset attributes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "voA_Ybsy9bJP", + "outputId": "1e748276-a3cc-4688-aa1d-5e0fb926353f", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 314 + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Training Dataset shape" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(34189, 18)\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Favorable and unfavorable labels" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1.0 0.0\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Protected attribute names" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['sex']\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Privileged and unprivileged protected attribute values" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[array([1.])] [array([0.])]\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Dataset feature names" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# print out some labels, names, etc.\n", + "display(Markdown(\"#### Training Dataset shape\"))\n", + "print(dataset_orig_train.features.shape)\n", + "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", + "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", + "display(Markdown(\"#### Protected attribute names\"))\n", + "print(dataset_orig_train.protected_attribute_names)\n", + "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", + "print(dataset_orig_train.privileged_protected_attributes,\n", + " dataset_orig_train.unprivileged_protected_attributes)\n", + "display(Markdown(\"#### Dataset feature names\"))\n", + "print(dataset_orig_train.feature_names)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1.0, 0.0)\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "5Jw3_OPu9bJP" + }, + "source": [ + "#### Metric for original training data" + ] }, { - "data": { - "text/markdown": [ - "#### Protected attribute names" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iL7VrC-m9bJQ", + "outputId": "90e3ce7f-d0ab-4493-d5ce-e8bc48a55a6a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 72 + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Original training dataset" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Difference in mean outcomes between unprivileged and privileged groups = -0.190244\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# Metric for the original dataset\n", + "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Original training dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_train.mean_difference())" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "['sex']\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "dYEfzlqN9bJR" + }, + "source": [ + "#### Train with and transform the original training data" + ] }, { - "data": { - "text/markdown": [ - "#### Privileged and unprivileged protected attribute values" + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false, + "id": "aB-CKPUo9bJR", + "outputId": "ae4df5f1-1e80-4897-b236-74cc0a0826fb", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/aif360/algorithms/preprocessing/optim_preproc.py:68: UserWarning: Privileged and unprivileged groups specified will not be used. The protected attributes are directly specified in the data preprocessing function. The current implementation automatically adjusts for discrimination across all groups. This can be changed by changing the optimization code.\n", + " warn(\"Privileged and unprivileged groups specified will not be \"\n", + "/usr/local/lib/python3.10/dist-packages/cvxpy/expressions/expression.py:612: UserWarning: \n", + "This use of ``*`` has resulted in matrix multiplication.\n", + "Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.\n", + " Use ``*`` for matrix-scalar and vector-scalar multiplication.\n", + " Use ``@`` for matrix-matrix and matrix-vector multiplication.\n", + " Use ``multiply`` for elementwise multiplication.\n", + "This code path has been hit 1 times so far.\n", + "\n", + " warnings.warn(msg, UserWarning)\n", + "/usr/local/lib/python3.10/dist-packages/cvxpy/expressions/expression.py:612: UserWarning: \n", + "This use of ``*`` has resulted in matrix multiplication.\n", + "Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.\n", + " Use ``*`` for matrix-scalar and vector-scalar multiplication.\n", + " Use ``@`` for matrix-matrix and matrix-vector multiplication.\n", + " Use ``multiply`` for elementwise multiplication.\n", + "This code path has been hit 2 times so far.\n", + "\n", + " warnings.warn(msg, UserWarning)\n", + "/usr/local/lib/python3.10/dist-packages/cvxpy/expressions/expression.py:612: UserWarning: \n", + "This use of ``*`` has resulted in matrix multiplication.\n", + "Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.\n", + " Use ``*`` for matrix-scalar and vector-scalar multiplication.\n", + " Use ``@`` for matrix-matrix and matrix-vector multiplication.\n", + " Use ``multiply`` for elementwise multiplication.\n", + "This code path has been hit 3 times so far.\n", + "\n", + " warnings.warn(msg, UserWarning)\n", + "/usr/local/lib/python3.10/dist-packages/cvxpy/expressions/expression.py:612: UserWarning: \n", + "This use of ``*`` has resulted in matrix multiplication.\n", + "Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.\n", + " Use ``*`` for matrix-scalar and vector-scalar multiplication.\n", + " Use ``@`` for matrix-matrix and matrix-vector multiplication.\n", + " Use ``multiply`` for elementwise multiplication.\n", + "This code path has been hit 4 times so far.\n", + "\n", + " warnings.warn(msg, UserWarning)\n", + "/usr/local/lib/python3.10/dist-packages/cvxpy/problems/problem.py:1387: UserWarning: Solution may be inaccurate. Try another solver, adjusting the solver settings, or solve with verbose=True for more information.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Optimized Preprocessing: Objective converged to 0.010890\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "OP = OptimPreproc(OptTools, optim_options,\n", + " unprivileged_groups = unprivileged_groups,\n", + " privileged_groups = privileged_groups)\n", + "\n", + "OP = OP.fit(dataset_orig_train)\n", + "\n", + "# Transform training data and align features\n", + "dataset_transf_train = OP.transform(dataset_orig_train, transform_Y=True)\n", + "dataset_transf_train = dataset_orig_train.align_datasets(dataset_transf_train)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "([array([1.])], [array([0.])])\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "4S5hoh8Y9bJR" + }, + "source": [ + "#### Metric with the transformed training data" + ] }, { - "data": { - "text/markdown": [ - "#### Dataset feature names" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aIretI9R9bJS", + "outputId": "22a76ac7-2cda-4474-a720-b68353b6b8a3", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 72 + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Transformed training dataset" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Difference in mean outcomes between unprivileged and privileged groups = -0.047833\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Transformed training dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_transf_train.mean_difference())" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "PaqQjOD09bJS" + }, + "source": [ + "Optimized preprocessing has reduced the disparity in favorable outcomes between the privileged and unprivileged\n", + "groups (training data)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dY3_u9SM9bJS" + }, + "outputs": [], + "source": [ + "### Testing\n", + "assert np.abs(metric_transf_train.mean_difference()) < np.abs(metric_orig_train.mean_difference())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4QTuHi_o9bJS" + }, + "source": [ + "#### Load, clean up original test data and compute metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q2YWVVS59bJS", + "outputId": "cff26fb1-44df-4a6f-bfb6-06affe4b42e5", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 128 + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Testing Dataset shape" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(7327, 18)\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Original test dataset" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Difference in mean outcomes between unprivileged and privileged groups = -0.190984\n" + ] + } + ], + "source": [ + "dataset_orig_test = dataset_transf_train.align_datasets(dataset_orig_test)\n", + "display(Markdown(\"#### Testing Dataset shape\"))\n", + "print(dataset_orig_test.features.shape)\n", + "\n", + "metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Original test dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_test.mean_difference())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xmzgd6lY9bJS" + }, + "source": [ + "#### Transform test data and compute metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p4yx6PlK9bJT", + "outputId": "8dc98d18-9e14-42a9-b7c9-db51e051e7b1", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 72 + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Transformed test dataset" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Difference in mean outcomes between unprivileged and privileged groups = -0.050932\n" + ] + } + ], + "source": [ + "dataset_transf_test = OP.transform(dataset_orig_test, transform_Y = True)\n", + "dataset_transf_test = dataset_orig_test.align_datasets(dataset_transf_test)\n", + "\n", + "metric_transf_test = BinaryLabelDatasetMetric(dataset_transf_test,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "display(Markdown(\"#### Transformed test dataset\"))\n", + "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_transf_test.mean_difference())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WiORE_UT9bJT" + }, + "source": [ + "Optimized preprocessing has reduced the disparity in favorable outcomes between the privileged and unprivileged\n", + "groups (test data)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QUbsZEhP9bJT" + }, + "outputs": [], + "source": [ + "### Testing\n", + "assert np.abs(metric_transf_test.mean_difference()) < np.abs(metric_orig_test.mean_difference())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HbPMNGJJ9bJT" + }, + "source": [ + "### Train classifier on original data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cBjtKydK9bJT" + }, + "outputs": [], + "source": [ + "# Logistic regression classifier and predictions\n", + "scale_orig = StandardScaler()\n", + "X_train = scale_orig.fit_transform(dataset_orig_train.features)\n", + "y_train = dataset_orig_train.labels.ravel()\n", + "\n", + "lmod = LogisticRegression()\n", + "lmod.fit(X_train, y_train)\n", + "y_train_pred = lmod.predict(X_train)\n", + "\n", + "# positive class index\n", + "pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]\n", + "\n", + "dataset_orig_train_pred = dataset_orig_train.copy()\n", + "dataset_orig_train_pred.labels = y_train_pred" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cpYPUzwu9bJT" + }, + "source": [ + "#### Obtain scores original test set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XEEAkn3a9bJT" + }, + "outputs": [], + "source": [ + "dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)\n", + "X_valid = scale_orig.transform(dataset_orig_valid_pred.features)\n", + "y_valid = dataset_orig_valid_pred.labels\n", + "dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)\n", + "\n", + "dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)\n", + "X_test = scale_orig.transform(dataset_orig_test_pred.features)\n", + "y_test = dataset_orig_test_pred.labels\n", + "dataset_orig_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VSaTmbhx9bJU" + }, + "source": [ + "### Find the optimal classification threshold from the validation set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "inoHZhxy9bJU", + "outputId": "6b770c24-70bb-4ceb-8666-ca09169bf8be", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Best balanced accuracy (no fairness constraints) = 0.7463\n", + "Optimal classification threshold (no fairness constraints) = 0.2872\n" + ] + } + ], + "source": [ + "num_thresh = 100\n", + "ba_arr = np.zeros(num_thresh)\n", + "class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)\n", + "for idx, class_thresh in enumerate(class_thresh_arr):\n", + "\n", + " fav_inds = dataset_orig_valid_pred.scores > class_thresh\n", + " dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label\n", + " dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label\n", + "\n", + " classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,\n", + " dataset_orig_valid_pred,\n", + " unprivileged_groups=unprivileged_groups,\n", + " privileged_groups=privileged_groups)\n", + "\n", + " ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\\\n", + " +classified_metric_orig_valid.true_negative_rate())\n", + "\n", + "best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]\n", + "best_class_thresh = class_thresh_arr[best_ind]\n", + "\n", + "print(\"Best balanced accuracy (no fairness constraints) = %.4f\" % np.max(ba_arr))\n", + "print(\"Optimal classification threshold (no fairness constraints) = %.4f\" % best_class_thresh)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-o5vmB0Y9bJU" + }, + "source": [ + "### Predictions and fairness metrics from original test set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EM1AgxaI9bJU", + "outputId": "23b3c00a-b58a-46d5-9a3f-d366b61b133d", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 292 + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Predictions from original testing data" + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Testing set" + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "##### Raw predictions - No fairness constraints" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 45%|████▌ | 45/100 [00:00<00:00, 135.82it/s]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Balanced accuracy = 0.7437\n", + "Statistical parity difference = -0.3580\n", + "Disparate impact = 0.2794\n", + "Average odds difference = -0.3181\n", + "Equal opportunity difference = -0.3769\n", + "Theil index = 0.1129\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 60%|██████ | 60/100 [00:00<00:00, 138.12it/s]/usr/local/lib/python3.10/dist-packages/aif360/metrics/dataset_metric.py:82: RuntimeWarning: invalid value encountered in double_scalars\n", + " return metric_fun(privileged=False) / metric_fun(privileged=True)\n", + "100%|██████████| 100/100 [00:00<00:00, 139.93it/s]\n" + ] + } + ], + "source": [ + "display(Markdown(\"#### Predictions from original testing data\"))\n", + "\n", + "bal_acc_arr_orig = []\n", + "disp_imp_arr_orig = []\n", + "avg_odds_diff_arr_orig = []\n", + "\n", + "display(Markdown(\"#### Testing set\"))\n", + "display(Markdown(\"##### Raw predictions - No fairness constraints\"))\n", + "\n", + "for thresh in tqdm(class_thresh_arr):\n", + "\n", + " fav_inds = dataset_orig_test_pred.scores > thresh\n", + " dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label\n", + " dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label\n", + "\n", + " if (thresh == best_class_thresh):\n", + " disp = True\n", + " else:\n", + " disp = False\n", + "\n", + " metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred,\n", + " unprivileged_groups, privileged_groups, disp=disp)\n", + "\n", + " bal_acc_arr_orig.append(metric_test_bef[\"Balanced accuracy\"])\n", + " avg_odds_diff_arr_orig.append(metric_test_bef[\"Average odds difference\"])\n", + " disp_imp_arr_orig.append(metric_test_bef[\"Disparate impact\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "60HbaG209bJU", + "outputId": "47405090-491e-4a78-af8b-257599f7014c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 629 + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "fig, ax1 = plt.subplots(figsize=(10,7))\n", + "ax1.plot(class_thresh_arr, bal_acc_arr_orig)\n", + "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", + "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", + "ax1.xaxis.set_tick_params(labelsize=14)\n", + "ax1.yaxis.set_tick_params(labelsize=14)\n", + "\n", + "\n", + "ax2 = ax1.twinx()\n", + "ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_orig)), color='r')\n", + "ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')\n", + "ax2.axvline(np.array(class_thresh_arr)[best_ind],\n", + " color='k', linestyle=':')\n", + "ax2.yaxis.set_tick_params(labelsize=14)\n", + "ax2.grid(True)\n", + "\n", + "disp_imp_at_best_bal_acc_orig = np.abs(1.0-np.array(disp_imp_arr_orig))[best_ind]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vbRZEt0r9bJU" + }, + "source": [ + "```abs(1-disparate impact)``` must be close to zero for classifier predictions to be fair." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4zeWdeYL9bJU" + }, + "source": [ + "### Train classifier on transformed data and obtain predictions with its fairness metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "59LC5sXc9bJU" + }, + "outputs": [], + "source": [ + "scale_transf = StandardScaler()\n", + "X_train = scale_transf.fit_transform(dataset_transf_train.features)\n", + "y_train = dataset_transf_train.labels.ravel()\n", + "\n", + "lmod = LogisticRegression()\n", + "lmod.fit(X_train, y_train)\n", + "y_train_pred = lmod.predict(X_train)\n", + "\n", + "dataset_transf_train_pred = dataset_transf_train.copy()\n", + "dataset_transf_train_pred.labels = y_train_pred" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W8rSib0a9bJV" + }, + "source": [ + "### Predictions and fairness metrics from transformed test set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aQUn759e9bJV" + }, + "outputs": [], + "source": [ + "dataset_transf_test_pred = dataset_transf_test.copy(deepcopy=True)\n", + "X_test = scale_transf.transform(dataset_transf_test_pred.features)\n", + "y_test = dataset_transf_test_pred.labels\n", + "dataset_transf_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kUfPTk1V9bJV", + "outputId": "c7b8e892-0275-424f-f99d-0d08d632db30", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 292 + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Predictions from transformed testing data" + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "#### Testing set" + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/markdown": "##### Transformed predictions - No fairness constraints" + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 44%|████▍ | 44/100 [00:00<00:00, 139.92it/s]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Balanced accuracy = 0.7013\n", + "Statistical parity difference = -0.0722\n", + "Disparate impact = 0.7895\n", + "Average odds difference = -0.0487\n", + "Equal opportunity difference = -0.0429\n", + "Theil index = 0.1469\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 59%|█████▉ | 59/100 [00:00<00:00, 141.36it/s]/usr/local/lib/python3.10/dist-packages/aif360/metrics/dataset_metric.py:82: RuntimeWarning: invalid value encountered in double_scalars\n", + " return metric_fun(privileged=False) / metric_fun(privileged=True)\n", + "100%|██████████| 100/100 [00:00<00:00, 110.73it/s]\n" + ] + } + ], + "source": [ + "display(Markdown(\"#### Predictions from transformed testing data\"))\n", + "\n", + "bal_acc_arr_transf = []\n", + "disp_imp_arr_transf = []\n", + "avg_odds_diff_arr_transf = []\n", + "\n", + "display(Markdown(\"#### Testing set\"))\n", + "display(Markdown(\"##### Transformed predictions - No fairness constraints\"))\n", + "\n", + "for thresh in tqdm(class_thresh_arr):\n", + "\n", + " fav_inds = dataset_transf_test_pred.scores > thresh\n", + " dataset_transf_test_pred.labels[fav_inds] = dataset_transf_test_pred.favorable_label\n", + " dataset_transf_test_pred.labels[~fav_inds] = dataset_transf_test_pred.unfavorable_label\n", + "\n", + " if (thresh == best_class_thresh):\n", + " disp = True\n", + " else:\n", + " disp = False\n", + "\n", + " metric_test_bef = compute_metrics(dataset_transf_test, dataset_transf_test_pred,\n", + " unprivileged_groups, privileged_groups, disp=disp)\n", + "\n", + " bal_acc_arr_transf.append(metric_test_bef[\"Balanced accuracy\"])\n", + " avg_odds_diff_arr_transf.append(metric_test_bef[\"Average odds difference\"])\n", + " disp_imp_arr_transf.append(metric_test_bef[\"Disparate impact\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YAoaHLhS9bJV", + "outputId": "362586d4-eba2-4de6-db4b-04eec24fc60f", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 629 + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "fig, ax1 = plt.subplots(figsize=(10,7))\n", + "ax1.plot(class_thresh_arr, bal_acc_arr_transf)\n", + "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", + "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", + "ax1.xaxis.set_tick_params(labelsize=14)\n", + "ax1.yaxis.set_tick_params(labelsize=14)\n", + "\n", + "\n", + "ax2 = ax1.twinx()\n", + "ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_transf)), color='r')\n", + "ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')\n", + "ax2.axvline(np.array(class_thresh_arr)[best_ind],\n", + " color='k', linestyle=':')\n", + "ax2.yaxis.set_tick_params(labelsize=14)\n", + "ax2.grid(True)\n", + "\n", + "disp_imp_at_best_bal_acc_transf = np.abs(1.0-np.array(disp_imp_arr_transf))[best_ind]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ng19V4Hx9bJV" + }, + "source": [ + "```abs(1-disparate impact)``` must be close to zero for classifier predictions to be fair. This measure has improved using classifier trained using the transformed data compared to the original data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "srNJ5EZR9bJV" + }, + "outputs": [], + "source": [ + "### testing\n", + "assert disp_imp_at_best_bal_acc_transf < disp_imp_at_best_bal_acc_orig" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9ZArXBrz9bJV" + }, + "source": [ + "# Summary of Results\n", + "We show the optimal classification thresholds, and the fairness and accuracy metrics." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p_0dSqVO9bJV" + }, + "source": [ + "### Classification Thresholds\n", + "\n", + "| Dataset |Classification threshold|\n", + "|-|-|\n", + "|Adult|0.2674|\n", + "|German|0.6732|\n", + "|Compas|0.5148|" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "amlwNuwn9bJW" + }, + "source": [ + "### Fairness Metric: Disparate impact, Accuracy Metric: Balanced accuracy\n", + "\n", + "#### Performance\n", + "\n", + "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", + "|-|-|-|-|-|-|-|-|-|\n", + "|Adult (Test)|0.7417|0.7021|0.2774|0.7729|0.7417|0.7408|0.4423|0.7645|\n", + "|German (Test)|0.6524|0.5698|0.9948|1.0664|0.6524|0.6067|0.3824|0.8228|\n", + "|Compas (Test)|0.6774|0.6606|0.6631|0.8085|0.6774|0.6790|0.6600|0.8430|\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oiyAy22V9bJW" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.10" + }, + "colab": { + "provenance": [] } - ], - "source": [ - "# print out some labels, names, etc.\n", - "display(Markdown(\"#### Training Dataset shape\"))\n", - "print(dataset_orig_train.features.shape)\n", - "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", - "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", - "display(Markdown(\"#### Protected attribute names\"))\n", - "print(dataset_orig_train.protected_attribute_names)\n", - "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", - "print(dataset_orig_train.privileged_protected_attributes, \n", - " dataset_orig_train.unprivileged_protected_attributes)\n", - "display(Markdown(\"#### Dataset feature names\"))\n", - "print(dataset_orig_train.feature_names)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Metric for original training data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Metric for the original dataset\n", - "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Original training dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_train.mean_difference())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Train with and transform the original training data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "OP = OptimPreproc(OptTools, optim_options,\n", - " unprivileged_groups = unprivileged_groups,\n", - " privileged_groups = privileged_groups)\n", - "\n", - "OP = OP.fit(dataset_orig_train)\n", - "\n", - "# Transform training data and align features\n", - "dataset_transf_train = OP.transform(dataset_orig_train, transform_Y=True)\n", - "dataset_transf_train = dataset_orig_train.align_datasets(dataset_transf_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Metric with the transformed training data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Transformed training dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_transf_train.mean_difference())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Optimized preprocessing has reduced the disparity in favorable outcomes between the privileged and unprivileged\n", - "groups (training data)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "### Testing \n", - "assert np.abs(metric_transf_train.mean_difference()) < np.abs(metric_orig_train.mean_difference())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Load, clean up original test data and compute metric" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_orig_test = dataset_transf_train.align_datasets(dataset_orig_test)\n", - "display(Markdown(\"#### Testing Dataset shape\"))\n", - "print(dataset_orig_test.features.shape)\n", - "\n", - "metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Original test dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_test.mean_difference())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Transform test data and compute metric" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_transf_test = OP.transform(dataset_orig_test, transform_Y = True)\n", - "dataset_transf_test = dataset_orig_test.align_datasets(dataset_transf_test)\n", - "\n", - "metric_transf_test = BinaryLabelDatasetMetric(dataset_transf_test, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - "display(Markdown(\"#### Transformed test dataset\"))\n", - "print(\"Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_transf_test.mean_difference())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Optimized preprocessing has reduced the disparity in favorable outcomes between the privileged and unprivileged\n", - "groups (test data)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "### Testing \n", - "assert np.abs(metric_transf_test.mean_difference()) < np.abs(metric_orig_test.mean_difference())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Train classifier on original data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Logistic regression classifier and predictions\n", - "scale_orig = StandardScaler()\n", - "X_train = scale_orig.fit_transform(dataset_orig_train.features)\n", - "y_train = dataset_orig_train.labels.ravel()\n", - "\n", - "lmod = LogisticRegression()\n", - "lmod.fit(X_train, y_train)\n", - "y_train_pred = lmod.predict(X_train)\n", - "\n", - "# positive class index\n", - "pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]\n", - "\n", - "dataset_orig_train_pred = dataset_orig_train.copy()\n", - "dataset_orig_train_pred.labels = y_train_pred" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Obtain scores original test set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)\n", - "X_valid = scale_orig.transform(dataset_orig_valid_pred.features)\n", - "y_valid = dataset_orig_valid_pred.labels\n", - "dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)\n", - "\n", - "dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)\n", - "X_test = scale_orig.transform(dataset_orig_test_pred.features)\n", - "y_test = dataset_orig_test_pred.labels\n", - "dataset_orig_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Find the optimal classification threshold from the validation set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "num_thresh = 100\n", - "ba_arr = np.zeros(num_thresh)\n", - "class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)\n", - "for idx, class_thresh in enumerate(class_thresh_arr):\n", - " \n", - " fav_inds = dataset_orig_valid_pred.scores > class_thresh\n", - " dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label\n", - " dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label\n", - " \n", - " classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,\n", - " dataset_orig_valid_pred, \n", - " unprivileged_groups=unprivileged_groups,\n", - " privileged_groups=privileged_groups)\n", - " \n", - " ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\\\n", - " +classified_metric_orig_valid.true_negative_rate())\n", - "\n", - "best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]\n", - "best_class_thresh = class_thresh_arr[best_ind]\n", - "\n", - "print(\"Best balanced accuracy (no fairness constraints) = %.4f\" % np.max(ba_arr))\n", - "print(\"Optimal classification threshold (no fairness constraints) = %.4f\" % best_class_thresh)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Predictions and fairness metrics from original test set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display(Markdown(\"#### Predictions from original testing data\"))\n", - "\n", - "bal_acc_arr_orig = []\n", - "disp_imp_arr_orig = []\n", - "avg_odds_diff_arr_orig = []\n", - "\n", - "display(Markdown(\"#### Testing set\"))\n", - "display(Markdown(\"##### Raw predictions - No fairness constraints\"))\n", - "\n", - "for thresh in tqdm(class_thresh_arr):\n", - " \n", - " fav_inds = dataset_orig_test_pred.scores > thresh\n", - " dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label\n", - " dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label\n", - "\n", - " if (thresh == best_class_thresh):\n", - " disp = True\n", - " else:\n", - " disp = False\n", - " \n", - " metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, \n", - " unprivileged_groups, privileged_groups, disp=disp)\n", - " \n", - " bal_acc_arr_orig.append(metric_test_bef[\"Balanced accuracy\"])\n", - " avg_odds_diff_arr_orig.append(metric_test_bef[\"Average odds difference\"])\n", - " disp_imp_arr_orig.append(metric_test_bef[\"Disparate impact\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig, ax1 = plt.subplots(figsize=(10,7))\n", - "ax1.plot(class_thresh_arr, bal_acc_arr_orig)\n", - "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", - "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", - "ax1.xaxis.set_tick_params(labelsize=14)\n", - "ax1.yaxis.set_tick_params(labelsize=14)\n", - "\n", - "\n", - "ax2 = ax1.twinx()\n", - "ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_orig)), color='r')\n", - "ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')\n", - "ax2.axvline(np.array(class_thresh_arr)[best_ind], \n", - " color='k', linestyle=':')\n", - "ax2.yaxis.set_tick_params(labelsize=14)\n", - "ax2.grid(True)\n", - "\n", - "disp_imp_at_best_bal_acc_orig = np.abs(1.0-np.array(disp_imp_arr_orig))[best_ind]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```abs(1-disparate impact)``` must be close to zero for classifier predictions to be fair." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Train classifier on transformed data and obtain predictions with its fairness metrics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scale_transf = StandardScaler()\n", - "X_train = scale_transf.fit_transform(dataset_transf_train.features)\n", - "y_train = dataset_transf_train.labels.ravel()\n", - "\n", - "lmod = LogisticRegression()\n", - "lmod.fit(X_train, y_train)\n", - "y_train_pred = lmod.predict(X_train)\n", - "\n", - "dataset_transf_train_pred = dataset_transf_train.copy()\n", - "dataset_transf_train_pred.labels = y_train_pred" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Predictions and fairness metrics from transformed test set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset_transf_test_pred = dataset_transf_test.copy(deepcopy=True)\n", - "X_test = scale_transf.transform(dataset_transf_test_pred.features)\n", - "y_test = dataset_transf_test_pred.labels\n", - "dataset_transf_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display(Markdown(\"#### Predictions from transformed testing data\"))\n", - "\n", - "bal_acc_arr_transf = []\n", - "disp_imp_arr_transf = []\n", - "avg_odds_diff_arr_transf = []\n", - "\n", - "display(Markdown(\"#### Testing set\"))\n", - "display(Markdown(\"##### Transformed predictions - No fairness constraints\"))\n", - "\n", - "for thresh in tqdm(class_thresh_arr):\n", - " \n", - " fav_inds = dataset_transf_test_pred.scores > thresh\n", - " dataset_transf_test_pred.labels[fav_inds] = dataset_transf_test_pred.favorable_label\n", - " dataset_transf_test_pred.labels[~fav_inds] = dataset_transf_test_pred.unfavorable_label\n", - "\n", - " if (thresh == best_class_thresh):\n", - " disp = True\n", - " else:\n", - " disp = False\n", - " \n", - " metric_test_bef = compute_metrics(dataset_transf_test, dataset_transf_test_pred, \n", - " unprivileged_groups, privileged_groups, disp=disp)\n", - " \n", - " bal_acc_arr_transf.append(metric_test_bef[\"Balanced accuracy\"])\n", - " avg_odds_diff_arr_transf.append(metric_test_bef[\"Average odds difference\"])\n", - " disp_imp_arr_transf.append(metric_test_bef[\"Disparate impact\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fig, ax1 = plt.subplots(figsize=(10,7))\n", - "ax1.plot(class_thresh_arr, bal_acc_arr_transf)\n", - "ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')\n", - "ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')\n", - "ax1.xaxis.set_tick_params(labelsize=14)\n", - "ax1.yaxis.set_tick_params(labelsize=14)\n", - "\n", - "\n", - "ax2 = ax1.twinx()\n", - "ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_transf)), color='r')\n", - "ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')\n", - "ax2.axvline(np.array(class_thresh_arr)[best_ind], \n", - " color='k', linestyle=':')\n", - "ax2.yaxis.set_tick_params(labelsize=14)\n", - "ax2.grid(True)\n", - "\n", - "disp_imp_at_best_bal_acc_transf = np.abs(1.0-np.array(disp_imp_arr_transf))[best_ind]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```abs(1-disparate impact)``` must be close to zero for classifier predictions to be fair. This measure has improved using classifier trained using the transformed data compared to the original data.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "### testing\n", - "assert disp_imp_at_best_bal_acc_transf < disp_imp_at_best_bal_acc_orig" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Summary of Results\n", - "We show the optimal classification thresholds, and the fairness and accuracy metrics." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Classification Thresholds\n", - "\n", - "| Dataset |Classification threshold|\n", - "|-|-|\n", - "|Adult|0.2674|\n", - "|German|0.6732|\n", - "|Compas|0.5148|" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Fairness Metric: Disparate impact, Accuracy Metric: Balanced accuracy\n", - "\n", - "#### Performance\n", - "\n", - "| Dataset |Sex (Acc-Bef)|Sex (Acc-Aft)|Sex (Fair-Bef)|Sex (Fair-Aft)|Race/Age (Acc-Bef)|Race/Age (Acc-Aft)|Race/Age (Fair-Bef)|Race/Age (Fair-Aft)|\n", - "|-|-|-|-|-|-|-|-|-|\n", - "|Adult (Test)|0.7417|0.7021|0.2774|0.7729|0.7417|0.7408|0.4423|0.7645|\n", - "|German (Test)|0.6524|0.5698|0.9948|1.0664|0.6524|0.6067|0.3824|0.8228|\n", - "|Compas (Test)|0.6774|0.6606|0.6631|0.8085|0.6774|0.6790|0.6600|0.8430|\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "nbformat": 4, + "nbformat_minor": 0 }