sapply(c("ggplot2", "tidyr", "tibble", "dplyr", "purrr", "RColorBrewer"), require, character.only = TRUE)
##      ggplot2        tidyr       tibble        dplyr        purrr 
##         TRUE         TRUE         TRUE         TRUE         TRUE 
## RColorBrewer 
##         TRUE
source("utils.R") # plain R script containing the functions we wrote in the Wright-Fisher section
set.seed(23)

Here we will use our Wright-Fisher simulations to simulate the current allele frequencies among 5 different college campus squirrel populations: Boston University (B), UT-Austin (T), University of Michigan (M), Syracuse (S), and University of Florida (F).

We will then filter out alleles that fall to a frequency of 0 in all populations. This will comprise our final dataset of alleles for the classroom exercise.

Simulation Parameters:

N <- 100
g <- 200
m <- 50
p_0 <- runif(n = m, min = 0, max = 0.25)

Simulating each campus’ squirrel allele frequencies:

# Boston University
X_B <- wrightFisher(.N = N, .p = p_0, .g = g)
# UT-Austin
X_T <- wrightFisher(.N = N, .p = p_0, .g = g)
# University of Michigan
X_M <- wrightFisher(.N = N, .p = p_0, .g = g)
# Syracuse
X_S <- wrightFisher(.N = N, .p = p_0, .g = g)
# University of Florida
X_F <- wrightFisher(.N = N, .p = p_0, .g = g)
# Wild population
X_Wild <- wrightFisher(.N = N*100, .p = p_0, .g = g)

How many alleles are non-zero in at least one population?

sum((X_B[, g] > 0) | (X_T[, g] > 0) | (X_M[, g] > 0) | 
      (X_S[, g] > 0) | (X_F[, g] > 0))
## [1] 40

How many alleles are zero in all populations? (Should sum to \(m\) with the previous set)

zero_idxs <- which((X_B[, g] == 0) & (X_T[, g] == 0) & (X_M[, g] == 0) & 
                     (X_S[, g] == 0) & (X_F[, g] == 0))
length(zero_idxs)
## [1] 10

Removing all-zero alleles:

X_B <- X_B[-zero_idxs,]
X_T <- X_T[-zero_idxs,]
X_M <- X_M[-zero_idxs,]
X_S <- X_S[-zero_idxs,]
X_F <- X_F[-zero_idxs,]
X_Wild <- X_Wild[-zero_idxs,]

Here are our final datasets for the classroom exercise, colored by frequency.

nAlleles <- m - length(zero_idxs)
plotdf <- tibble(i = rep(seq(from = 10, to = 50, by = 10), each = nAlleles) + 5, 
                 j = rep(c(1:nAlleles), times = 5),
                 freq = c(X_B[, g], X_T[, g], X_M[, g], X_S[, g], X_F[, g]))
ggplot(plotdf, aes(x = i, y = j, fill = freq)) +
  geom_tile(width = 1) +
  geom_text(data = plotdf, aes(x = i + 3,
                               y = j,
                               label = round(freq, digits = 2))) +
  geom_text(data = plotdf, aes(x = i - 2,
                               y = j,
                               label = rev(j))) +
  scale_fill_distiller(palette = "Spectral") +
  theme_classic()

Saving these frequencies so they can be referenced permanently:

# save(X_B, X_T, X_M, X_S, X_F, file = "Frequencies.RData")

Visualizing example alleles in all 5 college populations:

Example of an allele that remained segregating in most populations:

sample_allele <- which(X_B[,200] == .22)
plot(x = 1:g, y = X_B[sample_allele, ], type = "l", col = "#EE2C24",
     xlab = "Generation", ylab = "allele frequency", ylim = c(0, 1),
     main = paste("Locus", nrow(X_B) - sample_allele + 1))
lines(x = 1:g, y = X_T[sample_allele, ], type = "l", col = "#CC6E28")
lines(x = 1:g, y = X_M[sample_allele, ], type = "l", col = "#F7CE0E")
lines(x = 1:g, y = X_S[sample_allele, ], type = "l", col = "#2300F6")
lines(x = 1:g, y = X_F[sample_allele, ], type = "l", col = "#007C46")

Example of an allele that was lost from all but BU:

sample_allele <- which(X_B[,200] == .16)
plot(x = 1:g, y = X_B[sample_allele, ], type = "l", col = "#EE2C24",
     xlab = "Generation", ylab = "allele frequency", ylim = c(0, 1),
     main = paste("Locus", nrow(X_B) - sample_allele + 1))
lines(x = 1:g, y = X_T[sample_allele, ], type = "l", col = "#CC6E28")
lines(x = 1:g, y = X_M[sample_allele, ], type = "l", col = "#F7CE0E")
lines(x = 1:g, y = X_S[sample_allele, ], type = "l", col = "#2300F6")
lines(x = 1:g, y = X_F[sample_allele, ], type = "l", col = "#007C46")

Random wild alleles:

Example of an allele that remained segregating in most populations:

sample_alleles <- sample(c(1:nrow(X_Wild)), size = 5)
plot(x = 1:g, y = X_Wild[sample_alleles[1], ], type = "l", col = "red",
     xlab = "Generation", ylab = "allele frequency", ylim = c(0, 1),
     main = "5 Alleles in Wild Population (N = 10,000)")
lines(x = 1:g, y = X_Wild[sample_alleles[2], ], type = "l", col = "green")
lines(x = 1:g, y = X_Wild[sample_alleles[3], ], type = "l", col = "blue")
lines(x = 1:g, y = X_Wild[sample_alleles[4], ], type = "l", col = "yellow")
lines(x = 1:g, y = X_Wild[sample_alleles[5], ], type = "l", col = "purple")

Those same 5 alleles in the BU population:

plot(x = 1:g, y = X_B[sample_alleles[1], ], type = "l", col = "red",
     xlab = "Generation", ylab = "allele frequency", ylim = c(0, 1),
     main = "5 Alleles in BU Population (N = 100)")
lines(x = 1:g, y = X_B[sample_alleles[2], ], type = "l", col = "green")
lines(x = 1:g, y = X_B[sample_alleles[3], ], type = "l", col = "blue")
lines(x = 1:g, y = X_B[sample_alleles[4], ], type = "l", col = "yellow")
lines(x = 1:g, y = X_B[sample_alleles[5], ], type = "l", col = "purple")

```