```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(reshape2)
```

We will be using the dataset at [https://github.com/propublica/compas-analysis/raw/master/compas-scores-two-years.csv](https://github.com/propublica/compas-analysis/raw/master/compas-scores-two-years.csv). Reading it in:

```{r}
set.seed(0)
compas <- read.csv("https://github.com/propublica/compas-analysis/raw/master/compas-scores-two-years.csv")
```

First, we'll obtain the set we'll be working with:

```{r}
idx <- sample(1:nrow(compas))

idx.train <- idx[1:5000]
idx.test <- idx[5001:6000]
idx.valid <- idx[6001:7000]

compas.train <- compas[idx.train,]
compas.test <- compas[idx.test,]
compas.valid <- compas[idx.valid,]
```

### Part 1: Comparing the scores of black and white defendants 

We first explore whether white and black defendants get the same COMPAS scores.

```{r}
compas.bw <- compas.train %>% filter(race %in% c("African-American", "Caucasian"))

ggplot(compas.bw, mapping = aes(x = decile_score)) +
  geom_histogram(bins = 10, mapping = aes(y = ..density..)) + 
  scale_x_continuous(breaks = c(1:10)) +
  facet_wrap(~race)  + 
  xlab("Decile score")
```


For African-American defendants, the distribution of the scores is approximately uniform. For Caucasian defendants, many more get low scores than high scores.

### Part 2: Initial evaluation of the COMPAS scores

Here, we are computing the FPR, FNR, and correct classification rate for different populations. First, we'll define functions to compute the quantities needed.

```{r}
get.FPR <- function(compas.set, thr){
  return(sum((compas.set$decile_score >= thr) & (compas.set$is_recid == 0))/sum(compas.set$is_recid == 0))
}

get.FNR <- function(compas.set, thr){
  return(sum((compas.set$decile_score < thr) & (compas.set$is_recid == 1))/sum(compas.set$is_recid == 1))
}

get.CCR <- function(compas.set, thr){
  return(mean((compas.set$decile_score >= thr) == compas.set$is_recid))
}

```

Compute the sets we need:

```{r}
compas.valid.b <- compas.valid %>% filter(race == "African-American")
compas.valid.w <- compas.valid %>% filter(race == "Caucasian")
```

Now, we can compute the numbers:

```{r}
thr <- 5

fps <- c(get.FPR(compas.valid.b, 5), get.FPR(compas.valid.w, 5), get.FPR(compas.valid, 5))
fns <- c(get.FNR(compas.valid.b, 5), get.FNR(compas.valid.w, 5), get.FNR(compas.valid, 5))
ccr <- c(get.CCR(compas.valid.b, 5), get.CCR(compas.valid.w, 5), get.CCR(compas.valid, 5))

rates <- data.frame(FPR = fps, FNR = fns, CCR = ccr)
rownames(rates) = c("black", "white", "all")
rates
```

We can see that the scores do not satisfy false positive parity and do not satisfy false negative parity. The scores do satisfy classification parity. Demographic parity is also not satisfied.

### Part 3: Altering the threshold

We will now see how changing the threshold influences the false positive, false negative, and correct classification rates.

```{r}
get.rates <- function(thr, compas.set){
  return(c(get.FPR(compas.set, thr), get.FNR(compas.set, thr), get.CCR(compas.set, thr)))
}

thrs <- seq(0.5, 9.5, 1)
rates.w <- sapply(thrs, FUN=get.rates, compas.valid.w)
rates.b <- sapply(thrs, FUN=get.rates, compas.valid.b)
rates.all <- sapply(thrs, FUN=get.rates, compas.valid)

plot.rates <- function(thrs, rates, caption){
    rates.df <- data.frame(threshold = thrs, FPR = rates[1,], FNR = rates[2,], CCR = rates[3,])
    rates.df.tidy <- melt(rates.df, 1) %>% select(threshold = threshold, measure = variable, rate = value)
    ggplot(rates.df.tidy, mapping = aes(x = threshold, y = rate, color = measure)) +
      geom_smooth(se = F, method = "loess") + labs(title = caption)
}
```

We can now get three figures for the different demographics (let's forego using facets again):


```{r}
plot.rates(thrs, rates.w, "white defendants")
plot.rates(thrs, rates.b, "black defendants")
plot.rates(thrs, rates.all, "all defendants")
```

### Part 4: Trying to reproduce the score


Let's fit the model:

```{r}
fit <- glm(is_recid ~ age + priors_count, family = binomial, data = compas.train)
fit$coefficients
```


* An increase of 1 in the number of priors is associated with an increase of 0.17 in the log-odds of recidivism, all other things being equal.

* An increase in age by one year corresponds to a decrease of 0.05 in the log-odds of recidivism.


* (If we're being a bit silly and decide to extrapolate) according to the model, a newborn with no priors would have a probability of $\sigma(1.04) = 0.74$ of being re-arrested.

Let's now obtain the FPR, FNR, and CCR for our model, using the threshold $0.5$.

```{r}

get.FPR.fit <- function(thr, compas.set, fit){
  pred <- predict(fit, newdata = compas.set, type = "response") > thr
  return(sum((pred == 1) & (compas.set$is_recid == 0))/sum(compas.set$is_recid == 0))
}


get.FNR.fit <- function(thr, compas.set, fit){
  pred <- predict(fit, newdata = compas.set, type = "response") > thr
  return(sum((pred == 0) & (compas.set$is_recid == 1))/sum(compas.set$is_recid == 1))
}

get.CCR.fit <- function(thr, compas.set, fit){
  pred <- predict(fit, newdata = compas.set, type = "response") > thr
  return(mean(pred == compas.set$is_recid))
}

compas.train.b <- compas.train %>% filter(race == "African-American")
compas.train.w <- compas.train %>% filter(race == "Caucasian")
compas.valid.b <- compas.valid %>% filter(race == "African-American")
compas.valid.w <- compas.valid %>% filter(race == "Caucasian")
thr <- .5

demo.perf.dat.train <- data.frame(FP =  c(get.FPR.fit(thr, compas.train, fit),
                                          get.FPR.fit(thr, compas.train.w, fit),
                                          get.FPR.fit(thr, compas.train.b, fit)),
                                  FN =  c(get.FNR.fit(thr, compas.train, fit),
                                          get.FNR.fit(thr, compas.train.w, fit),
                                          get.FNR.fit(thr, compas.train.b, fit)),
                                  CCR =  c(get.CCR.fit(thr, compas.train, fit),
                                          get.CCR.fit(thr, compas.train.w, fit),
                                          get.CCR.fit(thr, compas.train.b, fit)))

demo.perf.dat.train


demo.perf.dat.valid <- data.frame(FP =  c(get.FPR.fit(thr, compas.valid, fit),
                                          get.FPR.fit(thr, compas.valid.w, fit),
                                          get.FPR.fit(thr, compas.valid.b, fit)),
                                  FN =  c(get.FNR.fit(thr, compas.valid, fit),  
                                          get.FNR.fit(thr, compas.valid.w, fit),
                                          get.FNR.fit(thr, compas.valid.b, fit)),
                                  CCR =  c(get.CCR.fit(thr, compas.valid, fit),
                                          get.CCR.fit(thr, compas.valid.w, fit),
                                          get.CCR.fit(thr, compas.valid.b, fit)))

demo.perf.dat.valid
```


It appears that there is basically no overfitting.


Let's create the two hypothetical people and predict the probability of re-arrest for both:
```{r}
dat = data.frame(age = c(30, 30), priors_count = c(2, 3))
predict(fit, newdata = dat, type = "response")
```

We observe an increase of about 3% in the probability.


### Part 5: Adjusting thresholds

We basically want to find the thresholds for which the false positive rates are at parity. Let's see what the rates are for different thresholds.

```{r}
thr <- c(0.3, 0.4, 0.51, 0.52, 0.53, 0.54, 0.55, 0.553, 0.555, 0.558, 0.56, 0.57, 0.58, 0.59, 0.7, 0.8)
fp.rates <- data.frame(thr = thr, 
                       FP.b  = sapply(thr, get.FPR.fit, compas.train.b, fit),
                       FP.w = sapply(thr, get.FPR.fit, compas.train.w, fit))

fp.rates
```

We need to tweak the threshold for black defendants just a little:

```{r}

plot.FPR <- function(fp.rates){
  colnames(fp.rates) <- c("thr", "African-American", "Caucasian")
  df.melted <- melt(fp.rates, "thr")
  colnames(df.melted) <- c("thr", "demographic", "FPR")
  ggplot(df.melted) + 
    geom_smooth(mapping=aes(x = thr, y = FPR, color = demographic), method = "loess", se = F) + 
    geom_vline(xintercept = 0.5, linetype = "dashed") + 
    geom_vline(xintercept = 0.553, linetype = "dashed") + 
    geom_hline(yintercept = 0.211, linetype = "dashed") + 
    ggtitle('FPR across thresholds')
}

plot.FPR(fp.rates)
```

`thr = 0.553` seems about right.

Now the white and black demographic would be at parity. We'll compute the correct classification rate on the validation set.

```{r}
n.correct <- sum(compas.train.b$is_recid == (predict(fit, newdata = compas.train.b, type = "response") > 0.553)) + 
             sum(compas.train.w$is_recid == (predict(fit, newdata = compas.train.w, type = "response") > 0.5))     
n.total <- nrow(compas.train.b) + nrow(compas.train.w)

n.correct/n.total
```

(Note that we ignored everyone who wasn't white or black. That's OK to do, but including other demographics (in any way you like) is OK too).