{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Auditing the COMPAS Score: Predictive Modeling and Algorithmic Fairness" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will be using the dataset at [https://github.com/propublica/compas-analysis/raw/master/compas-scores-two-years.csv](https://github.com/propublica/compas-analysis/raw/master/compas-scores-two-years.csv). Reading it in:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "from sklearn import linear_model\n", "from sklearn import preprocessing\n", "import learningmachine as lm\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "np.random.seed(0)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# read in data as Pandas dataframe\n", "df_in = pd.read_csv(\"https://github.com/propublica/compas-analysis/raw/master/compas-scores-two-years.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "name | \n", "first | \n", "last | \n", "compas_screening_date | \n", "sex | \n", "dob | \n", "age | \n", "age_cat | \n", "race | \n", "... | \n", "v_decile_score | \n", "v_score_text | \n", "v_screening_date | \n", "in_custody | \n", "out_custody | \n", "priors_count.1 | \n", "start | \n", "end | \n", "event | \n", "two_year_recid | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "miguel hernandez | \n", "miguel | \n", "hernandez | \n", "2013-08-14 | \n", "Male | \n", "1947-04-18 | \n", "69 | \n", "Greater than 45 | \n", "Other | \n", "... | \n", "1 | \n", "Low | \n", "2013-08-14 | \n", "2014-07-07 | \n", "2014-07-14 | \n", "0 | \n", "0 | \n", "327 | \n", "0 | \n", "0 | \n", "
1 | \n", "3 | \n", "kevon dixon | \n", "kevon | \n", "dixon | \n", "2013-01-27 | \n", "Male | \n", "1982-01-22 | \n", "34 | \n", "25 - 45 | \n", "African-American | \n", "... | \n", "1 | \n", "Low | \n", "2013-01-27 | \n", "2013-01-26 | \n", "2013-02-05 | \n", "0 | \n", "9 | \n", "159 | \n", "1 | \n", "1 | \n", "
2 | \n", "4 | \n", "ed philo | \n", "ed | \n", "philo | \n", "2013-04-14 | \n", "Male | \n", "1991-05-14 | \n", "24 | \n", "Less than 25 | \n", "African-American | \n", "... | \n", "3 | \n", "Low | \n", "2013-04-14 | \n", "2013-06-16 | \n", "2013-06-16 | \n", "4 | \n", "0 | \n", "63 | \n", "0 | \n", "1 | \n", "
3 | \n", "5 | \n", "marcu brown | \n", "marcu | \n", "brown | \n", "2013-01-13 | \n", "Male | \n", "1993-01-21 | \n", "23 | \n", "Less than 25 | \n", "African-American | \n", "... | \n", "6 | \n", "Medium | \n", "2013-01-13 | \n", "NaN | \n", "NaN | \n", "1 | \n", "0 | \n", "1174 | \n", "0 | \n", "0 | \n", "
4 | \n", "6 | \n", "bouthy pierrelouis | \n", "bouthy | \n", "pierrelouis | \n", "2013-03-26 | \n", "Male | \n", "1973-01-22 | \n", "43 | \n", "25 - 45 | \n", "Other | \n", "... | \n", "1 | \n", "Low | \n", "2013-03-26 | \n", "NaN | \n", "NaN | \n", "2 | \n", "0 | \n", "1102 | \n", "0 | \n", "0 | \n", "
5 rows × 53 columns
\n", "