sapply(c("stringr", "RColorBrewer", "circlize", "ggridges",
         "ComplexHeatmap"), require, character.only=TRUE)
## Loading required package: stringr
## Loading required package: RColorBrewer
## Loading required package: circlize
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
## Loading required package: ggridges
## Loading required package: ComplexHeatmap
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.24.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
##        stringr   RColorBrewer       circlize 
##           TRUE           TRUE           TRUE 
##       ggridges ComplexHeatmap 
##           TRUE           TRUE
source("functions_for_figure_scripts.R")
load("data_files/FinalDataframe3Disp.RData")
load("data_files/Cleaned_Counts.RData")
load("data_files/Cleaned_Counts_Allele.RData")
load("data_files/CorrelationClustering3Disp.RData")
load(file = "data_files/GO_Slim.RData")

Plotting a number-line of sampling timepoints proportional to time ellapsed (min) for the Experimental Design figure

# following bgoldst suggestion on stackoverflow: https://stackoverflow.com/questions/32051890/r-plotting-points-with-labels-on-a-single-horizontal-numberline
plotSamplingTimepoints <- function(.timepoints, .labels = FALSE,
                                   .label_height = 20) {
  max_tp <- max(.timepoints)
  min_tp <- min(.timepoints)
  xlim <- c(min_tp, max_tp)
  ylim <- c(min_tp, max_tp)
  px <- .timepoints
  py <- rep(0, times = length(.timepoints))
  lx.buf <- 5
  lx <- seq(xlim[1] + lx.buf,
            xlim[2] - lx.buf, 
            len = length(px))
  ly <- .label_height
  xlim <- c(xlim[1] - 10,
            xlim[2] + 30)
  ## create basic plot outline
  par(xaxs = 'i', yaxs = 'i', mar = c(5, 1, 1, 1))# mar = c(b, l, t, r)
  plot(NA, xlim = xlim, ylim = ylim, axes = FALSE, ann = FALSE)
  #axis(1)
  
  ## plot elements
  points(px, py, pch = 16, xpd = NA)
  if (.labels) {
    segments(px, py, lx, ly)
    text(lx, ly, px, pos = 3)
  }
}
# tests for plotSamplingTimepoints
test_tps <- info[info$experiment == "LowPi",]$time_point_num |> unique()
plotSamplingTimepoints(.timepoints = test_tps)
plotSamplingTimepoints(.timepoints = test_tps, .labels = TRUE)

plotSamplingTimepoints(.timepoints = test_tps, .label_height = 100, .labels = TRUE)

label_lookup <- tibble(experiment = ExperimentNames,
                       label_height = c(150, 10, 40, 20, 5, 5))
# generating plots
for (e in ExperimentNames) {
  e_tps <- info[info$experiment == e,]$time_point_num |> unique()
  labh <- label_lookup[label_lookup$experiment == e,]$label_height |> as.numeric()
  pdf(file = paste0("paper_figures/ExperimentOverview/timepoints_",
                    e, ".pdf"), width = 5, height = 2)
  print(plotSamplingTimepoints(.timepoints = e_tps, .label_height = labh))
  dev.off()
}
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL

Visualizing cluster average expression for all environments:

display.brewer.all()

Color palettes we’ll use for the 6 environments:

palettedf <- tibble(experiment = ExperimentNames,
                    palette = c("Greys", "YlGn", "Greens", "Purples", "YlOrBr", "BuPu"),
                    long_name = LongExperimentNames)

Wrapping cluster plots into a function so we don’t have to repeatedly make the same plot

plotClusterPatternByExperiment <- function(.df, .experiment, .title = NULL) {
  plotdf <- summarise(group_by(.df, time_point_num, label),
                      mean_expr = mean(expr, na.rm = TRUE))
  plotdf$label <- as.factor(plotdf$label)
  color_plt <- palettedf |> filter(experiment == .experiment) |> dplyr::select(palette) |> pull()
  if (is.null(.title)) {
    .title <- palettedf |> filter(experiment == .experiment) |> dplyr::select(long_name) |> pull()
  }
  ggplot(plotdf, aes(x = time_point_num, y = log2(mean_expr + 1))) +
    geom_line(aes(group = label, color = label), linewidth = 4) +
    geom_point(color = "black", alpha = 0.4) +
    xlab("timepoint (min)") +
    ylab("expression (log2)") +
    scale_color_brewer(palette = color_plt, name = "cluster",
                       direction = -1) +
    theme_classic() +
    theme(legend.position = "right") +
    ggtitle(.title)
}
p_blank <- ggplot() + theme_void()
pdf("paper_figures/ExperimentOverview/cluster_ref.pdf",
    width = 8, height = 8)
ggarrange(plotClusterPatternByExperiment(clusterdf_list$HAP4_2$df, .experiment = "HAP4"),
          p_blank,
          plotClusterPatternByExperiment(clusterdf_list$CC_2$df, .experiment = "CC"),
          plotClusterPatternByExperiment(clusterdf_list$LowN_2$df, .experiment = "LowN"),
          p_blank,
          plotClusterPatternByExperiment(clusterdf_list$LowPi_2$df, .experiment = "LowPi"),
          plotClusterPatternByExperiment(clusterdf_list$Heat_2$df, .experiment = "Heat"),
          p_blank,
          plotClusterPatternByExperiment(clusterdf_list$Cold_2$df, .experiment = "Cold"),
          nrow = 3, ncol = 3, common.legend = FALSE, widths = c(2, 0.5, 2))
## `summarise()` has grouped output by 'time_point_num'.
## You can override using the `.groups` argument.
## `summarise()` has grouped output by 'time_point_num'.
## You can override using the `.groups` argument.
## `summarise()` has grouped output by 'time_point_num'.
## You can override using the `.groups` argument.
## `summarise()` has grouped output by 'time_point_num'.
## You can override using the `.groups` argument.
## `summarise()` has grouped output by 'time_point_num'.
## You can override using the `.groups` argument.
## `summarise()` has grouped output by 'time_point_num'.
## You can override using the `.groups` argument.
dev.off()
## quartz_off_screen 
##                 2

Similar proportions of genes are diverging in each environment

Stacked bars of 4 divergence categories in each environment:

plotdf <- finaldf |> dplyr::select(gene_name, experiment, group4) |> 
  pivot_wider(id_cols = c("gene_name"), names_from = "experiment",
              values_from = "group4") |> 
  pivot_longer(cols = ExperimentNames, names_to = "experiment",
               values_to = "group5") # to add NA values for low expression
## Warning: Using an external vector in selections was deprecated in
## tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(ExperimentNames)
## 
##   # Now:
##   data %>% select(all_of(ExperimentNames))
## 
## See
## <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where
## this warning was generated.
plotdf$group5 <- if_else(is.na(plotdf$group5), true = "lowly expressed",
                         false = plotdf$group5) |> 
  factor(levels = c("conserved level and plasticity",
                    "conserved level, diverged plasticity",
                    "diverged level, conserved plasticity",
                    "diverged level and plasticity",
                    "lowly expressed"))
plotdf$experiment <- factor(plotdf$experiment, levels = ExperimentNames)
p <- ggplot(plotdf, aes(x = experiment)) + 
  geom_bar_pattern(aes(fill = group5, pattern = group5), pattern_fill = "black") +
  scale_fill_discrete(type = colordf[colordf$scheme == "group4",]$type,
                      limits = colordf[colordf$scheme == "group4",]$limits) +
  scale_pattern_discrete(choices = colordf[colordf$scheme == "group4",]$pattern,
                         limits = colordf[colordf$scheme == "group4",]$limits) +
  scale_x_discrete(limits = ExperimentNames, labels = LongExperimentNames) +
  theme_classic() +
  theme(legend.title = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        legend.position = "bottom") +
  ylab("number of genes") +
  xlab("") +
  guides(fill = guide_legend(nrow = 3))
pdf("paper_figures/EnvironmentalPatterns/stacked_bars.pdf",
    width = 5, height = 6)
p
dev.off()
## quartz_off_screen 
##                 2

Ridgeline plot of log2 fold changes in each environment:

plotdf <- filter(finaldf, level == "diverged")
filter(finaldf, plasticity == "conserved" & level == "conserved") |> group_by(experiment) |> 
  summarise(n = n())
## # A tibble: 6 × 2
##   experiment     n
##   <chr>      <int>
## 1 CC          1329
## 2 Cold        1416
## 3 HAP4        2018
## 4 Heat        1586
## 5 LowN        1920
## 6 LowPi       1658
# accompanying percentages of upScer and upSpar genes
pctdf <- plotdf |> group_by(experiment) |> 
  summarise(pct_upcer = round(sum(effect_size_species > 0)/length(effect_size_species), digits = 2)*100,
            pct_uppar = round(sum(effect_size_species < 0)/length(effect_size_species), digits = 2)*100,
            n = length(effect_size_species)) |> 
  pivot_longer(cols = c("pct_upcer", "pct_uppar"),
               names_to = "direction", values_to = "pct") |> 
  mutate(x_pos = if_else(direction == "pct_upcer",
                         true = 5, false = -5))
pctdf$y_pos <- sapply(pctdf$experiment, \(e) which(ExperimentNames == e) + 0.5)
p <- ggplot(data = plotdf, aes(x = effect_size_species, y = experiment, fill = experiment)) +
  geom_density_ridges(data = plotdf) +
  geom_text(data = pctdf, aes(x = x_pos, y = y_pos, label = paste0(pct, "%"))) +
  theme_classic() +
  theme(legend.position = "none") +
  ylab("") +
  xlab("log2 fold change") +
  scale_fill_discrete(limits = colordf[colordf$scheme == "experiment",]$limits,
                      type = colordf[colordf$scheme == "experiment",]$type) +
  scale_y_discrete(limits = ExperimentNames, labels = LongExperimentNames)
p
## Picking joint bandwidth of 0.265

pdf("paper_figures/EnvironmentalPatterns/ridgeline.pdf",
    width = 4, height = 3)
p
## Picking joint bandwidth of 0.265
dev.off()
## quartz_off_screen 
##                 2

Heatmap of species-specific and reversals of expression plasticity

plotdf <- finaldf |> # filter(plasticity == "diverged") |> 
  group_by(experiment, cer, par) |> 
  summarise(n = n())
## `summarise()` has grouped output by 'experiment', 'cer'.
## You can override using the `.groups` argument.
plotdf$long_experiment <- map(plotdf$experiment, \(e) {
  LongExperimentNames[which(ExperimentNames == e)]
}) |> factor(levels = LongExperimentNames)
plotdf$cer <- factor(plotdf$cer, levels = c("0", "1", "2"))
plotdf$par <- factor(plotdf$par, levels = c("2", "1", "0"))
plotdf$type <- map2(plotdf$cer, plotdf$par, \(x, y) {
  if (x == y) {
    if (x == 0 & y == 0) {
      return("conserved static")
    }
    return("conserved plastic")
  }
  if (x == 0) {
    return("Spar-unique")
  }
  if (y == 0) {
    return("Scer-unique")
  }
  else {
    return("reversal")
  }
}) |> unlist()
p <- ggplot(plotdf, aes(x = cer, y = par, fill = type)) +
  geom_tile() +
  geom_text(aes(label = n,
                color = grepl(pattern = "conserved", x = type))) +
  scale_color_discrete(limits = c(TRUE, FALSE),
                       type = c("grey40", "white")) +
  scale_fill_discrete(limits = colordf[colordf$scheme == "plasticity",]$limits,
                      type = colordf[colordf$scheme == "plasticity",]$type) +
   facet_wrap(~long_experiment) +
  theme_classic() +
  xlab("Scer plasticity cluster") +
  ylab("Spar plasticity cluster") +
  guides(fill=guide_legend(title="number of genes")) +
  theme(legend.position = "none")
pdf("paper_figures/EnvironmentalPatterns/plasticity_counts_heatmap.pdf",
    width = 4, height = 3)
p
dev.off()
## quartz_off_screen 
##                 2

Divergence category in one environment is a poor predictor of other environments

plotdf <- finaldf |> dplyr::select(gene_name, experiment, group4) |> 
  # pivoting wider and longer again to add the genes that have been removed for low expression
  pivot_wider(id_cols = "gene_name", values_from = "group4", names_from = "experiment") |> 
  pivot_longer(cols = all_of(ExperimentNames),
               names_to = "experiment", values_to = "group4")
plotdf$group4[is.na(plotdf$group4)] <- "lowly expressed"
plot_mat <- plotdf |> 
  pivot_wider(id_cols = "gene_name",
              names_from = "experiment", 
              values_from = "group4")
colnames_plotmat <- plot_mat$gene_name
rownames_plotmat <- colnames(plot_mat)
plot_mat <- data.frame(plot_mat) |> t()
colnames(plot_mat) <- colnames_plotmat
rownames(plot_mat) <- rownames_plotmat
plot_mat <- plot_mat[rownames(plot_mat) != "gene_name",]
plot_mat <- plot_mat |> as.matrix()
clrs <- structure(colordf[colordf$scheme == "group4",]$type, 
                  names = colordf[colordf$scheme == "group4",]$limits)
majority_NA <- apply(plot_mat, 2, \(x) {return(sum(is.na(x)) > 2)})
sum(majority_NA)
## [1] 0
plot_mat <- plot_mat[,!majority_NA]
# sorting cols by HAP4's order
col_order_vec <- order(factor(plot_mat["HAP4",], levels = colordf[colordf$scheme == "group4",]$limits))

# # tests for OrderGenesByGroup (moved to function script b/c we use it in multiple scripts now)
# # subset of plotmat
# orderGenesByGroup(.mat = plot_mat[, 1:100]) |> dim()

# ordering full plotmat
ordered_plot_mat <- orderGenesByGroup(.mat = plot_mat[ExperimentNames,])
rownames(ordered_plot_mat) <- LongExperimentNames
plotdf <- map(LongExperimentNames, \(e) {
  idx <- which(LongExperimentNames == e)
  return(tibble(gene_name = colnames(ordered_plot_mat),
                environment = e,
                group4 = ordered_plot_mat[idx,],
                x = c(1:ncol(ordered_plot_mat)),
                y = c(length(LongExperimentNames):1)[idx])) # descending from top
}) |> purrr::reduce(.f = bind_rows)
p <- ggplot(plotdf, aes(x = x, y = y, fill = group4)) +
  geom_tile_pattern(aes(fill = group4, pattern = group4), pattern_fill = "black",
                    pattern_density = 0.05, pattern_size = 0.05) +
  scale_fill_discrete(limits = colordf[colordf$scheme == "group4",]$limits,
                      type = colordf[colordf$scheme == "group4",]$type) +
  scale_pattern_discrete(limits = colordf[colordf$scheme == "group4",]$limits,
                         choices = colordf[colordf$scheme == "group4",]$pattern) +
  theme_void()
p

# plotting
pdf("paper_figures/EnvironmentalPatterns/discrete_heatmapLines.pdf",
    width = 14, height = 2)
p
dev.off()
## quartz_off_screen 
##                 2

Level divergence is consistent across environments and mostly between strains

Genes with higher Log2 fold change in Scer:

plot_mat <- matrix(nrow = length(ExperimentNames),
                   ncol = length(ExperimentNames))
# up in Scer
for (e_row in ExperimentNames) {
  for (e_col in ExperimentNames) {
    e_gene_idxs <- finaldf |> filter(experiment == e_row &
                                       level == "diverged" &
                                       sign(effect_size_species) == 1) |> 
      dplyr::select(gene_name) |> pull()
    avg_lfc <- finaldf |> filter(experiment == e_col & 
                                   gene_name %in% e_gene_idxs) |> 
      dplyr::select(effect_size_species) |> pull() |> mean()
    plot_mat[which(ExperimentNames == e_row),
             which(ExperimentNames == e_col)] <- avg_lfc
  }
}
colnames(plot_mat) <- LongExperimentNames
rownames(plot_mat) <- LongExperimentNames

col_fun = colorRamp2(c(-1, 0, 1), c("blue2", "white", "orange1"))
p <- Heatmap(plot_mat, col = col_fun,
        row_order = LongExperimentNames, column_order = LongExperimentNames,
        row_names_side = "left", heatmap_legend_param = list(title = ""),
        cell_fun = function(j, i, x, y, width, height, fill) {
          grid.text(sprintf("%.2f", plot_mat[i, j]), x, y, gp = gpar(fontsize = 10))
        })
p

pdf("paper_figures/EnvironmentalPatterns/level_divergence_heatmap_up_scer.pdf",
    width = 5, height = 3)
p
dev.off()
## quartz_off_screen 
##                 2

Genes with higher Log2 fold change in Spar:

plot_mat <- matrix(nrow = length(ExperimentNames),
                   ncol = length(ExperimentNames))
for (e_row in ExperimentNames) {
  for (e_col in ExperimentNames) {
    e_gene_idxs <- finaldf |> filter(experiment == e_row &
                                       level == "diverged" & 
                                       sign(effect_size_species) == -1) |> 
      dplyr::select(gene_name) |> pull()
    avg_lfc <- finaldf |> filter(experiment == e_col & 
                                   gene_name %in% e_gene_idxs) |> 
      dplyr::select(effect_size_species) |> pull() |> mean()
    plot_mat[which(ExperimentNames == e_row),
             which(ExperimentNames == e_col)] <- avg_lfc
  }
}
colnames(plot_mat) <- LongExperimentNames
rownames(plot_mat) <- LongExperimentNames

col_fun = colorRamp2(c(-1, 0, 1), c("blue2", "white", "orange1"))
p <- Heatmap(plot_mat, col = col_fun,
        row_order = LongExperimentNames, column_order = LongExperimentNames,
        row_names_side = "left", heatmap_legend_param = list(title = ""),
        cell_fun = function(j, i, x, y, width, height, fill) {
          grid.text(sprintf("%.2f", plot_mat[i, j]), x, y, gp = gpar(fontsize = 10))
        })
p

pdf("paper_figures/EnvironmentalPatterns/level_divergence_heatmap_up_spar.pdf",
    width = 5, height = 3)
p
dev.off()
## quartz_off_screen 
##                 2

Plasticity divergence is unique to each environment

Y axis: environment those 2-1 or 1-2 divergers were ID’d in X axis: how their plasticity divergence looks in other environments How do we measure plasticity divergence? Correlation of avg expr between species

# Given gene idxs and experiment, 
# returns between-species correlation
# of average expression across timepoints
getplasticityCorr <- function(.gene_idxs, .experiment_name) {
  condition_vec <- info |> filter(experiment == .experiment_name) |> 
    dplyr::select(condition) |> pull()
  cer_vec <- collapsed$cer[.gene_idxs, condition_vec] |> 
    colMeans(na.rm = TRUE)
  par_vec <- collapsed$par[.gene_idxs, condition_vec] |> 
    colMeans(na.rm = TRUE)
  return(cor(cer_vec, par_vec))
}
# tests for getplasticity Corr
# HAP4 2-1, should be very uncorrelated in Sat Growth
gene_idxs <- finaldf |> filter(experiment == "HAP4" &
                                 cer == 2 & par == 1) |> 
  dplyr::select(gene_name) |> pull()
getplasticityCorr(gene_idxs, .experiment_name = "HAP4")
##            [,1]
## [1,] -0.9359643
getplasticityCorr(gene_idxs, .experiment_name = "LowN")
##          [,1]
## [1,] 0.906296
getplasticityCorr(gene_idxs, .experiment_name = "LowPi")
##           [,1]
## [1,] 0.9128942
getplasticityCorr(gene_idxs, .experiment_name = "CC")
##           [,1]
## [1,] 0.6486633
getplasticityCorr(gene_idxs, .experiment_name = "Heat")
##           [,1]
## [1,] 0.9786642
getplasticityCorr(gene_idxs, .experiment_name = "Cold")
##            [,1]
## [1,] -0.4895706

Heatmap of species correlations for genes increasing in Spar and decreasing in Scer:

# 2-1 divergers
plot_mat <- matrix(nrow = length(ExperimentNames),
                   ncol = length(ExperimentNames))
for (e_row in ExperimentNames) {
  for (e_col in ExperimentNames) {
    e_gene_idxs <- finaldf |> filter(experiment == e_row &
                                       cer == 2 & par == 1) |> 
      dplyr::select(gene_name) |> pull()
    e_cor <- getplasticityCorr(e_gene_idxs, .experiment_name = e_col)
    plot_mat[which(ExperimentNames == e_row),
             which(ExperimentNames == e_col)] <- e_cor
  }
}
colnames(plot_mat) <- LongExperimentNames
rownames(plot_mat) <- LongExperimentNames

# plotting
col_fun = colorRamp2(c(-1, 0, 1), c("red2", "white", "skyblue"))
p <- Heatmap(plot_mat, col = col_fun,
        row_order = LongExperimentNames, column_order = LongExperimentNames,
        row_names_side = "left", heatmap_legend_param = list(title = ""),
        cell_fun = function(j, i, x, y, width, height, fill) {
          grid.text(sprintf("%.2f", plot_mat[i, j]), x, y, gp = gpar(fontsize = 10))
        })
p

pdf("paper_figures/EnvironmentalPatterns/heatmap_21.pdf",
    width = 5, height = 3)
p
dev.off()
## quartz_off_screen 
##                 2

Heatmap of species correlations for genes decreasing in Spar and increasing in Scer:

# 1-2 divergers
plot_mat <- matrix(nrow = length(ExperimentNames),
                   ncol = length(ExperimentNames))
for (e_row in ExperimentNames) {
  for (e_col in ExperimentNames) {
    e_gene_idxs <- finaldf |> filter(experiment == e_row &
                                       cer == 1 & par == 2) |> 
      dplyr::select(gene_name) |> pull()
    e_cor <- getplasticityCorr(e_gene_idxs, .experiment_name = e_col)
    plot_mat[which(ExperimentNames == e_row),
             which(ExperimentNames == e_col)] <- e_cor
  }
}
colnames(plot_mat) <- LongExperimentNames
rownames(plot_mat) <- LongExperimentNames

# plotting
col_fun = colorRamp2(c(-1, 0, 1), c("red2", "white", "skyblue"))
p <- Heatmap(plot_mat, col = col_fun,
        row_order = LongExperimentNames, column_order = LongExperimentNames,
        row_names_side = "left", heatmap_legend_param = list(title = ""),
        cell_fun = function(j, i, x, y, width, height, fill) {
          grid.text(sprintf("%.2f", plot_mat[i, j]), x, y, gp = gpar(fontsize = 10))
        })
p

pdf("paper_figures/EnvironmentalPatterns/heatmap_12.pdf",
    width = 5, height = 3)
p
dev.off()
## quartz_off_screen 
##                 2

All plasticity-divergers grouped together:

getplasticityCorrAllClusters <- function(.gene_idxs, .experiment_name) {
  condition_vec <- info |> filter(experiment == .experiment_name) |> 
    dplyr::select(condition) |> pull()
  
  clust_pairs <- finaldf |> filter(gene_name %in% .gene_idxs &
                                     experiment == .experiment_name) |> 
    group_by(cer, par) |> summarise(n = n()) |> ungroup()
  cors <- map2(clust_pairs$cer, clust_pairs$par, \(x, y) {
    clust_idxs <- finaldf |> filter(gene_name %in% .gene_idxs &
                                      experiment == .experiment_name &
                                      cer == x & par == y) |> 
      dplyr::select(gene_name) |> pull()
    cer_vec <- collapsed$cer[clust_idxs, condition_vec, drop = FALSE] |> 
      colMeans(na.rm = TRUE)
    par_vec <- collapsed$par[clust_idxs, condition_vec, drop = FALSE] |> 
      colMeans(na.rm = TRUE)
    return(cor(cer_vec, par_vec))
  }) |> unlist()
  return(as.numeric(sum(cors*clust_pairs$n)/sum(clust_pairs$n))) # weighted average
}

# tests for getplasticityCorrBoth
# LowPi 1-2 alone:
gene_idxs <- finaldf |> filter(experiment == "LowPi" &
                                 plasticity == "diverged" &
                                 cer == 1 & par == 2) |> 
  dplyr::select(gene_name) |> pull()
testcor12 <- getplasticityCorrAllClusters(gene_idxs, .experiment_name = "LowPi")
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
testcor12
## [1] -0.9021212
getplasticityCorr(gene_idxs, .experiment_name = "LowPi") # should be same number
##            [,1]
## [1,] -0.9021212
# LowPi 2-1 alone:
gene_idxs <- finaldf |> filter(experiment == "LowPi" &
                                 plasticity == "diverged" &
                                 cer == 2 & par == 1) |> 
  dplyr::select(gene_name) |> pull()
testcor21 <- getplasticityCorrAllClusters(gene_idxs, .experiment_name = "LowPi")
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
testcor21
## [1] -0.9543984
getplasticityCorr(gene_idxs, .experiment_name = "LowPi") # should be same number
##            [,1]
## [1,] -0.9543984
# weights
finaldf |> filter(experiment == "LowPi" &
                    plasticity == "diverged" &
                    cer != 0 & par != 0) |> 
  dplyr::select(cer, par) |> table()
##    par
## cer   1   2
##   1   0 375
##   2 803   0
# what both should be:
sum(testcor12*372, testcor21*795)/(372 + 795) # with weighted average
## [1] -0.9377342
mean(c(testcor12, testcor21)) # without weighted average
## [1] -0.9282598
# Both 1-2 and 2-1:
gene_idxs <- finaldf |> filter(experiment == "LowPi" &
                                 plasticity == "diverged" &
                                 cer != 0 & par != 0) |> 
  dplyr::select(gene_name) |> pull()
getplasticityCorrAllClusters(gene_idxs, .experiment_name = "LowPi") # should be same as manual calculation
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## [1] -0.9377567
# test2: HAP4/CC failed until we added drop = FALSE
gene_idxs <- finaldf |> filter(experiment == "HAP4" &
                                 plasticity == "diverged" &
                                 cer != 0 & par != 0) |> 
  dplyr::select(gene_name) |> pull()
finaldf |> filter(experiment == "CC" & gene_name %in% gene_idxs) |> 
  dplyr::select(cer, par) |> table() # b/c there's only one 1-0 gene
##    par
## cer   0   1   2
##   0   6  12   5
##   1  31 148  32
##   2   9  42  50
getplasticityCorrAllClusters(gene_idxs, .experiment_name = "CC")
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## [1] 0.5749308
# plotting
plot_mat <- matrix(nrow = length(ExperimentNames),
                   ncol = length(ExperimentNames))
for (e_row in ExperimentNames) {
  for (e_col in ExperimentNames) {
    cat(e_row, e_col, "\n")
    e_gene_idxs <- finaldf |> filter(experiment == e_row &
                                         cer != 0 & par != 0 &
                                       plasticity == "diverged") |> 
      dplyr::select(gene_name) |> pull()
    e_cor <- getplasticityCorrAllClusters(.gene_idxs = e_gene_idxs,
                                        .experiment_name = e_col)
    plot_mat[which(ExperimentNames == e_row),
             which(ExperimentNames == e_col)] <- e_cor
  }
}
## HAP4 HAP4
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## HAP4 CC
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## HAP4 LowN
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## HAP4 LowPi
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## HAP4 Heat
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## HAP4 Cold
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## CC HAP4
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## CC CC
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## CC LowN
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## CC LowPi
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## CC Heat
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## CC Cold
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowN HAP4
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowN CC
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowN LowN
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowN LowPi
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowN Heat
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowN Cold
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowPi HAP4
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowPi CC
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowPi LowN
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowPi LowPi
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowPi Heat
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## LowPi Cold
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Heat HAP4
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Heat CC
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Heat LowN
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Heat LowPi
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Heat Heat
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Heat Cold
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Cold HAP4
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Cold CC
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Cold LowN
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Cold LowPi
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Cold Heat
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
## Cold Cold
## `summarise()` has grouped output by 'cer'. You can
## override using the `.groups` argument.
colnames(plot_mat) <- LongExperimentNames
rownames(plot_mat) <- LongExperimentNames

# plotting
col_fun = colorRamp2(c(-1, 0, 1), c("red", "white", "skyblue"))
p <- Heatmap(plot_mat, col = col_fun,
        row_order = LongExperimentNames, column_order = LongExperimentNames,
        row_names_side = "left", heatmap_legend_param = list(title = ""),
        cell_fun = function(j, i, x, y, width, height, fill) {
          grid.text(sprintf("%.2f", plot_mat[i, j]), x, y, gp = gpar(fontsize = 10))
        })
p

pdf("paper_figures/EnvironmentalPatterns/heatmap_plasticity_all_clusters.pdf",
    width = 5, height = 4)
p
dev.off()
## quartz_off_screen 
##                 2

Example expression profiles for figure

Genes increasing in Scer, decreasing in Spar, identified in Low Phosphate:

gene_idxs <- finaldf |> filter(experiment == "LowPi" & cer == 1 & par == 2) |> 
  dplyr::select(gene_name) |> pull()
p <- plotExpressionProfilePair(.cts1 = collapsed$cer[gene_idxs,],
                          .cts2 = collapsed$par[gene_idxs,],
                          .info1 = info,
                          .info2 = info,,
                          .method = "line",
                          .show_points = FALSE,
                          .show_confidence_intervals = TRUE,
                          .normalization = "log2")
## `summarise()` has grouped output by 'group_id',
## 'gene_name', 'experiment'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'time_point_num',
## 'experiment'. You can override using the `.groups`
## argument.
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`
p

pdf("paper_figures/EnvironmentalPatterns/LowPi12.pdf",
    width = 12, height = 2)
p
dev.off()
## quartz_off_screen 
##                 2

Genes with higher expression in Scer, identified in the Low Nitrogen environment:

gene_idxs <- finaldf |> filter(experiment == "LowN" & level == "diverged" &
                                 sign(effect_size_species) == 1) |> 
  dplyr::select(gene_name) |> pull()
p <- plotExpressionProfilePair(.cts1 = collapsed$cer[gene_idxs,],
                          .cts2 = collapsed$par[gene_idxs,],
                          .info1 = info,
                          .info2 = info,,
                          .method = "line",
                          .show_points = FALSE,
                          .show_confidence_intervals = TRUE,
                          .normalization = "log2")
## `summarise()` has grouped output by 'group_id',
## 'gene_name', 'experiment'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'time_point_num',
## 'experiment'. You can override using the `.groups`
## argument.
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`
pdf("paper_figures/EnvironmentalPatterns/LowN_upcer.pdf",
    width = 12, height = 2)
p
dev.off()
## quartz_off_screen 
##                 2

Following up on additional examples flagged in heatmap

# following up on additional examples from heatmap
# Low Nitrogen 1-2, plasticity divergers have strongest negative correlation in Cold? -.97?
gene_idxs <- finaldf |> filter(experiment == "LowN" & cer == 1 & par == 2) |> 
  dplyr::select(gene_name) |> pull()
plotExpressionProfilePair(.cts1 = collapsed$cer[gene_idxs,],
                          .cts2 = collapsed$par[gene_idxs,],
                          .info1 = info,
                          .info2 = info,,
                          .method = "line",
                          .show_points = FALSE,
                          .show_confidence_intervals = TRUE,
                          .normalization = "log2")
## `summarise()` has grouped output by 'group_id',
## 'gene_name', 'experiment'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'time_point_num',
## 'experiment'. You can override using the `.groups`
## argument.
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`

length(gene_idxs) # yes, but LowN has the fewest reversals
## [1] 52
# following up on additional examples from heatmap
# Diauxic Shift 2-1, plasticity divergers also have strongest negative correlation in Cold?
gene_idxs <- finaldf |> filter(experiment == "HAP4" & cer == 2 & par == 1) |> 
  dplyr::select(gene_name) |> pull()
plotExpressionProfilePair(.cts1 = collapsed$cer[gene_idxs,],
                          .cts2 = collapsed$par[gene_idxs,],
                          .info1 = info,
                          .info2 = info,,
                          .method = "line",
                          .show_points = FALSE,
                          .show_confidence_intervals = TRUE,
                          .normalization = "log2")
## `summarise()` has grouped output by 'group_id',
## 'gene_name', 'experiment'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'time_point_num',
## 'experiment'. You can override using the `.groups`
## argument.
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`

length(gene_idxs)
## [1] 289
# following up on additional examples from heatmap
# HUShock 2-1, plasticity divergers also have strongest negative correlation in LowPi?
gene_idxs <- finaldf |> filter(experiment == "CC" & cer == 2 & par == 1) |> 
  dplyr::select(gene_name) |> pull()
plotExpressionProfilePair(.cts1 = collapsed$cer[gene_idxs,],
                          .cts2 = collapsed$par[gene_idxs,],
                          .info1 = info,
                          .info2 = info,,
                          .method = "line",
                          .show_points = FALSE,
                          .show_confidence_intervals = TRUE,
                          .normalization = "log2")
## `summarise()` has grouped output by 'group_id',
## 'gene_name', 'experiment'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'time_point_num',
## 'experiment'. You can override using the `.groups`
## argument.
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`

length(gene_idxs)
## [1] 567

Finally as a contrast, a pair with high correlation:

# following up on additional examples from heatmap
# HAP4 1-2, plasticity divergers have strong positive correlation Low Nitrogen.
gene_idxs <- finaldf |> filter(experiment == "HAP4" & cer == 1 & par == 2) |> 
  dplyr::select(gene_name) |> pull()
plotExpressionProfilePair(.cts1 = collapsed$cer[gene_idxs,],
                          .cts2 = collapsed$par[gene_idxs,],
                          .info1 = info,
                          .info2 = info,,
                          .method = "line",
                          .show_points = FALSE,
                          .show_confidence_intervals = TRUE,
                          .normalization = "log2")
## `summarise()` has grouped output by 'group_id',
## 'gene_name', 'experiment'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'time_point_num',
## 'experiment'. You can override using the `.groups`
## argument.
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`

length(gene_idxs)
## [1] 55

Supplemental figures

Exploration: Diagonal is still the most uncorrelated when taking average cor of each ortholog pair

# (as opposed to one cor of average expr of all divergent genes)
# But now the correlations are much weaker magnitude b/c most single genes are weakly correlated moving the average towards 0
getplasticityCorrPerGene <- function(.gene_idxs, .experiment_name) {
  # filter for genes expressed high enough in this environment
  good_expr_genes <- finaldf |> filter(experiment == .experiment_name) |> 
    dplyr::select(gene_name) |> pull()
  .gene_idxs <- .gene_idxs[.gene_idxs %in% good_expr_genes]
  # calculating correlation of each gene btwn parental orthologs
  condition_vec <- info |> filter(experiment == .experiment_name) |> 
    dplyr::select(condition) |> pull()
  cer_mat <- collapsed$cer[.gene_idxs, condition_vec]
  par_mat <- collapsed$par[.gene_idxs, condition_vec]
  cor_vec <- map(.gene_idxs, \(g) {
    return(cor(cer_mat[g,], par_mat[g,]))
  }) |> unlist()
  return(mean(cor_vec, na.rm = TRUE)) # NAs can arise from one species' counts being all 0s
}
# tests for getplasticityCorrPerGene
# HAP4 all diverged plasticity
gene_idxs <- finaldf |> filter(experiment == "HAP4" &
                                 plasticity == "diverged" & 
                                 cer != 0 & par != 0) |> 
  dplyr::select(gene_name) |> pull()
getplasticityCorrPerGene(gene_idxs, .experiment_name = "HAP4")
## [1] -0.4089101
getplasticityCorrPerGene(gene_idxs, .experiment_name = "LowN")
## [1] 0.3788549
getplasticityCorrPerGene(gene_idxs, .experiment_name = "LowPi")
## [1] 0.2569421
getplasticityCorrPerGene(gene_idxs, .experiment_name = "CC")
## [1] 0.3012489
getplasticityCorrPerGene(gene_idxs, .experiment_name = "Heat")
## [1] 0.3936355
getplasticityCorrPerGene(gene_idxs, .experiment_name = "Cold")
## [1] 0.3931001
# all plasticity divergers
plot_mat <- matrix(nrow = length(ExperimentNames),
                   ncol = length(ExperimentNames))
for (e_row in ExperimentNames) {
  for (e_col in ExperimentNames) {
    e_gene_idxs <- finaldf |> filter(experiment == e_row &
                                       plasticity == "diverged" & 
                                       cer != 0 & par != 0) |> 
      dplyr::select(gene_name) |> pull()
    e_cor <- getplasticityCorrPerGene(e_gene_idxs, .experiment_name = e_col)
    plot_mat[which(ExperimentNames == e_row),
             which(ExperimentNames == e_col)] <- e_cor
  }
}
colnames(plot_mat) <- LongExperimentNames
rownames(plot_mat) <- LongExperimentNames

# plotting
col_fun = colorRamp2(c(0, 0.25, 0.5), c("red2", "white", "skyblue"))
Heatmap(plot_mat, col = col_fun,
        row_order = LongExperimentNames, column_order = LongExperimentNames,
        row_names_side = "left", heatmap_legend_param = list(title = ""),
        cell_fun = function(j, i, x, y, width, height, fill) {
          grid.text(sprintf("%.2f", plot_mat[i, j]), x, y, gp = gpar(fontsize = 10))
        })

Do genes that are diverging in plasticity but not level tend to criss-cross their expression? I.e. begin higher in one species and end higher in the other?

# example gene first, 2-1 HAP4
gene_idx <- finaldf |> filter(experiment == "HAP4" & 
                                level == "conserved" &
                                cer == 2 & par == 1) |> 
  dplyr::select(gene_name) |> pull() |> sample(1)
# plotEnvironments(.gene_idxs = "YMR221C") # first example we found, strong CC level divergence
plotEnvironments(.gene_idxs = gene_idx)
## `summarise()` has grouped output by 'group_id',
## 'gene_name', 'experiment'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'time_point_num',
## 'experiment'. You can override using the `.groups`
## argument.
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`
## Adding missing grouping variables: `time_point_num`,
## `experiment`

# Figure: plasticity-diverging genes have higher expression at TP0 in decreasing species

# ...in environment they were diverging in plasticity in:
getTP0AvgExpr <- function(.gene_name, .experiment, .organism,
                          .first_or_last = "first") {
  if (.organism == "cer") {
    cts_mat <- collapsed$cer
    info_df <- info
  }
  if (.organism == "par") {
    cts_mat <- collapsed$par
    info_df <- info
  }
  if (.first_or_last == "first") {
    tp0_condition <- info_df |> filter(experiment %in% .experiment) |> 
      filter(time_point_num == min(time_point_num)) |> 
      dplyr::select(condition) |> pull()
    mean_expr <- cts_mat[.gene_name, tp0_condition] |> mean()
    return(mean_expr)
  }
  if (.first_or_last == "last") {
    tpLast_condition <- info_df |> filter(experiment == .experiment) |> 
      filter(time_point_num == max(time_point_num)) |> 
      dplyr::select(condition) |> pull()
    mean_expr <- cts_mat[.gene_name, tpLast_condition] |> mean()
    return(mean_expr)
  }
}
# tests for getTP0AvgExpr
getTP0AvgExpr(.gene_name = "YGR192C", .experiment = "HAP4",
              .organism = "cer")
## [1] 19578.67
getTP0AvgExpr(.gene_name = "YGR192C", .experiment = "HAP4",
              .organism = "par")
## [1] 4457
getTP0AvgExpr(.gene_name = "YGR192C", .experiment = setdiff(ExperimentNames, "HAP4"),
              .organism = "cer")
## [1] 18328.67
getTP0AvgExpr(.gene_name = "YGR192C", .experiment = setdiff(ExperimentNames, "HAP4"),
              .organism = "par")
## [1] 3913
getTP0AvgExpr(.gene_name = "YGR192C", .experiment = "HAP4",
              .organism = "cer", .first_or_last = "last")
## [1] 8529.5
getTP0AvgExpr(.gene_name = "YGR192C", .experiment = "HAP4",
              .organism = "par", .first_or_last = "last")
## [1] 623

Getting average expression of each plasticity-diverging gene in the environment they were detected in:

plotdf <- finaldf |> filter(plasticity == "diverged" & level == "conserved")
plotdf$tp0_cer <- purrr::map(c(1:nrow(plotdf)), .f = \(i) {
  getTP0AvgExpr(.gene_name = plotdf$gene_name[i],
                .experiment = plotdf$experiment[i],
                .organism = "cer",
                .first_or_last = "first")
}) |> unlist()
plotdf$tp0_par <- purrr::map(c(1:nrow(plotdf)), .f = \(i) {
  getTP0AvgExpr(.gene_name = plotdf$gene_name[i],
                .experiment = plotdf$experiment[i],
                .organism = "par",
                .first_or_last = "first")
}) |> unlist()
plotdf$tplast_cer <- purrr::map(c(1:nrow(plotdf)), .f = \(i) {
  getTP0AvgExpr(.gene_name = plotdf$gene_name[i],
                .experiment = plotdf$experiment[i],
                .organism = "cer",
                .first_or_last = "last")
}) |> unlist()
plotdf$tplast_par <- purrr::map(c(1:nrow(plotdf)), .f = \(i) {
  getTP0AvgExpr(.gene_name = plotdf$gene_name[i],
                .experiment = plotdf$experiment[i],
                .organism = "par",
                .first_or_last = "last")
}) |> unlist()

Calling which species was expressed higher at tp0:

plotdf$higher_tp0 <- if_else(plotdf$tp0_cer > plotdf$tp0_par,
                             true = plotdf$tp0_cer, 
                             false = plotdf$tp0_par)
plotdf$lower_tp0 <- if_else(plotdf$tp0_cer > plotdf$tp0_par,
                             true = plotdf$tp0_par, 
                             false = plotdf$tp0_cer)
# NOTE: we're still calling higher or lower relative to tp0
plotdf$higher_tplast <- if_else(plotdf$tp0_cer > plotdf$tp0_par,
                             true = plotdf$tplast_cer, 
                             false = plotdf$tplast_par)
plotdf$lower_tplast <- if_else(plotdf$tp0_cer > plotdf$tp0_par,
                             true = plotdf$tplast_par, 
                             false = plotdf$tplast_cer)
plotdf_full <- left_join(x = pivot_longer(dplyr::select(plotdf, gene_name, experiment, 
                                                 higher_tp0, lower_tp0,
                                                 higher_tplast, lower_tplast), 
                                          cols = c("higher_tp0", "lower_tp0"),
                                          names_to = "higher_or_lower",
                                          values_to = "tp0") |> 
                           mutate(higher_or_lower = gsub("_tp0", "", higher_or_lower)),
                         y = pivot_longer(dplyr::select(plotdf, gene_name, experiment, 
                                                 higher_tp0, lower_tp0,
                                                 higher_tplast, lower_tplast), 
                                          cols = c("higher_tplast", "lower_tplast"),
                                          names_to = "higher_or_lower",
                                          values_to = "tplast") |> 
                           mutate(higher_or_lower = gsub("_tplast", "", higher_or_lower)),
                         by = c("gene_name", "experiment", "higher_or_lower")) |> 
  dplyr::select(gene_name, experiment, higher_or_lower, tp0, tplast) |> 
  mutate(scaled_tp0 = scale(tp0),
         scaled_tplast = scale(tplast)) |> 
  pivot_longer(cols = c("scaled_tp0", "scaled_tplast"),
               names_to = "timepoint",
               values_to = "scaled_expr") |> 
  mutate(timepoint = gsub("scaled_", "", timepoint))

Plotting

plotdf_means <- group_by(plotdf_full, timepoint, higher_or_lower) |>
  summarise(mean_scaled_expr = mean(scaled_expr))
## `summarise()` has grouped output by 'timepoint'. You can
## override using the `.groups` argument.
p_home <- ggplot(plotdf_means, aes(x = factor(timepoint,
                                              levels = c("tp0", "tplast"),
                                              labels = c("first", "last")),
                                   y = mean_scaled_expr)) +
  #geom_boxplot(aes(color = higher_or_lower), position = "identity") +
  geom_line(aes(color = higher_or_lower, group = higher_or_lower)) +
  geom_point(aes(color = higher_or_lower)) +
  scale_color_manual(values = c("red", "black"),
                     limits = c("higher", "lower"),
                     name = "Species had [higher or lower]\nexpression at first timepoint") +
  ylab("Expression (centered and scaled)") +
  xlab("timepoint") +
  theme_classic() +
  ggtitle("Home environment")
p_home

Repeating for non-home environment

gene_env_list <- finaldf |> filter(plasticity == "diverged" & level == "conserved") |> 
  dplyr::select(gene_name, experiment)
plotdf <- purrr::map2(gene_env_list$gene_name, gene_env_list$experiment, \(g, e) {
  finaldf |> filter(gene_name == g & experiment != e)
}) |> purrr::reduce(.f = bind_rows)
plotdf$tp0_cer <- purrr::map(c(1:nrow(plotdf)), .f = \(i) {
  getTP0AvgExpr(.gene_name = plotdf$gene_name[i],
                .experiment = plotdf$experiment[i],
                .organism = "cer",
                .first_or_last = "first")
}) |> unlist()
plotdf$tp0_par <- purrr::map(c(1:nrow(plotdf)), .f = \(i) {
  getTP0AvgExpr(.gene_name = plotdf$gene_name[i],
                .experiment = plotdf$experiment[i],
                .organism = "par",
                .first_or_last = "first")
}) |> unlist()
plotdf$tplast_cer <- purrr::map(c(1:nrow(plotdf)), .f = \(i) {
  getTP0AvgExpr(.gene_name = plotdf$gene_name[i],
                .experiment = plotdf$experiment[i],
                .organism = "cer",
                .first_or_last = "last")
}) |> unlist()
plotdf$tplast_par <- purrr::map(c(1:nrow(plotdf)), .f = \(i) {
  getTP0AvgExpr(.gene_name = plotdf$gene_name[i],
                .experiment = plotdf$experiment[i],
                .organism = "par",
                .first_or_last = "last")
}) |> unlist()

Calling which species was expressed higher at tp0:

plotdf$higher_tp0 <- if_else(plotdf$tp0_cer > plotdf$tp0_par,
                             true = plotdf$tp0_cer, 
                             false = plotdf$tp0_par)
plotdf$lower_tp0 <- if_else(plotdf$tp0_cer > plotdf$tp0_par,
                             true = plotdf$tp0_par, 
                             false = plotdf$tp0_cer)
# NOTE: we're still calling higher or lower relative to tp0
plotdf$higher_tplast <- if_else(plotdf$tp0_cer > plotdf$tp0_par,
                             true = plotdf$tplast_cer, 
                             false = plotdf$tplast_par)
plotdf$lower_tplast <- if_else(plotdf$tp0_cer > plotdf$tp0_par,
                             true = plotdf$tplast_par, 
                             false = plotdf$tplast_cer)
plotdf_full <- left_join(x = pivot_longer(dplyr::select(plotdf, gene_name, experiment, 
                                                 higher_tp0, lower_tp0,
                                                 higher_tplast, lower_tplast), 
                                          cols = c("higher_tp0", "lower_tp0"),
                                          names_to = "higher_or_lower",
                                          values_to = "tp0") |> 
                           mutate(higher_or_lower = gsub("_tp0", "", higher_or_lower)),
                         y = pivot_longer(dplyr::select(plotdf, gene_name, experiment, 
                                                 higher_tp0, lower_tp0,
                                                 higher_tplast, lower_tplast), 
                                          cols = c("higher_tplast", "lower_tplast"),
                                          names_to = "higher_or_lower",
                                          values_to = "tplast") |> 
                           mutate(higher_or_lower = gsub("_tplast", "", higher_or_lower)),
                         by = c("gene_name", "experiment", "higher_or_lower"),
                         relationship = "many-to-many") |> 
  dplyr::select(gene_name, experiment, higher_or_lower, tp0, tplast) |> 
  mutate(scaled_tp0 = scale(tp0),
         scaled_tplast = scale(tplast)) |> 
  pivot_longer(cols = c("scaled_tp0", "scaled_tplast"),
               names_to = "timepoint",
               values_to = "scaled_expr") |> 
  mutate(timepoint = gsub("scaled_", "", timepoint)) |> 
  unique()

Plotting

plotdf_means <- group_by(plotdf_full, timepoint, higher_or_lower) |>
  summarise(mean_scaled_expr = mean(scaled_expr))
## `summarise()` has grouped output by 'timepoint'. You can
## override using the `.groups` argument.
p_other <- ggplot(plotdf_means, aes(x = factor(timepoint,
                                               levels = c("tp0", "tplast"),
                                               labels = c("first", "last")),
                                    y = mean_scaled_expr)) +
  #geom_boxplot(aes(color = higher_or_lower), position = "identity") +
  geom_line(aes(color = higher_or_lower, group = higher_or_lower)) +
  geom_point(aes(color = higher_or_lower)) +
  scale_color_manual(values = c("red", "black"),
                     limits = c("higher", "lower"),
                     name = "Species had [higher or lower]\nexpression at first timepoint") +
  ylab("Expression (centered and scaled)") +
  xlab("timepoint") +
  theme_classic() +
  ggtitle("Other 5 environments")
p_other

Outputting plots

pdf("paper_figures/Supplement/tp0_lines.pdf",
    width = 6, height = 3)
ggarrange(p_home, p_other, nrow = 1, ncol = 2, common.legend = TRUE)
dev.off()
## quartz_off_screen 
##                 2

Now comparing this to random samples of genes

getTP0NormDifference <- function(.gene_name, .experiment, .decreasing_species) {
  avg_expr_decreasing <- getTP0AvgExpr(.gene_name = .gene_name, 
                                       .experiment = .experiment,
                                       .organism = .decreasing_species)
  avg_expr_increasing <- getTP0AvgExpr(.gene_name = .gene_name, 
                                       .experiment = .experiment,
                                       .organism = setdiff(c("cer", "par"), .decreasing_species))
  return((avg_expr_decreasing-avg_expr_increasing)/
           (avg_expr_decreasing + avg_expr_increasing))
}
# tests for getTP0AvgExpr
getTP0NormDifference(.gene_name = "YGR192C", .experiment = "HAP4",
                     .decreasing_species = "cer")
## [1] 0.6291345
# avg Normdiff for groups of genes
getTP0NormDifferenceAverage <- function(.gene_vec, .experiment_vec, .decreasing_species_vec,
                                        .home_experiment = TRUE) {
  stopifnot(all.equal(length(.gene_vec),
                      length(.experiment_vec),
                      length(.decreasing_species_vec)))
  if (.home_experiment) {
    output <- map(c(1:length(.gene_vec)), .f = \(i) {
      getTP0NormDifference(.gene_name = .gene_vec[i],
                           .experiment = .experiment_vec[i],
                           .decreasing_species = .decreasing_species_vec[i])
    }) |> unlist() |> mean(na.rm = TRUE)
  }
  if (!.home_experiment) {
    output <- map(c(1:length(.gene_vec)), .f = \(i) {
      getTP0NormDifference(.gene_name = .gene_vec[i],
                           .experiment = setdiff(ExperimentNames, .experiment_vec[i]),
                           .decreasing_species = .decreasing_species_vec[i])
    }) |> unlist() |> mean(na.rm = TRUE)
  }
  return(output)
}
# tests for getTP0NormDifferenceAverage
getTP0NormDifferenceAverage(.gene_vec = sample(finaldf$gene_name, 3),
                            .experiment_vec = sample(finaldf$experiment, 3),
                            .decreasing_species_vec = sample(c("cer", "par"), replace = TRUE, 3))
## [1] -0.2220307

Random simulation with 100 iterations:

tp0df <- finaldf |> filter(plasticity == "diverged" &
                             level == "conserved" &
                             cer %in% c(1, 2) &
                             par %in% c(1, 2)) |>
  mutate("decreasing_species" = if_else(cer == 1,
                                        true = "par",
                                        false = "cer")) |>
  dplyr::select(gene_name, experiment, cer, par, decreasing_species)

# observed avg norm diff for home environment
obs_home <- getTP0NormDifferenceAverage(.gene_vec = tp0df$gene_name,
                                        .experiment_vec = tp0df$experiment,
                                        .decreasing_species_vec = tp0df$decreasing_species)

# observed avg norm diff for other environments
obs_other <- getTP0NormDifferenceAverage(.gene_vec = tp0df$gene_name,
                                         .experiment_vec = tp0df$experiment,
                                         .decreasing_species_vec = tp0df$decreasing_species,
                                         .home_experiment = FALSE)

# random gene samples (with replacement) in home or other environment
nIter <- 100
randomdf <- map(1:nIter, \(iter) {
  cat(iter, "/", nIter, "\n")
  random_genes <- map(tp0df$experiment, \(e) {
    finaldf |> filter(experiment == e) |> dplyr::select(gene_name) |> pull() |> sample(1)
  }) |> unlist()
  random_org <- sample(c("cer", "par"), length(random_genes), replace = TRUE)
  home <- getTP0NormDifferenceAverage(.gene_vec = random_genes,
                                      .experiment_vec = tp0df$experiment,
                                      .decreasing_species_vec = random_org)
  other <- getTP0NormDifferenceAverage(.gene_vec = random_genes,
                                       .experiment_vec = tp0df$experiment,
                                       .decreasing_species_vec = random_org,
                                       .home_experiment = FALSE)
  return(list(home = home, other = other))
}) |> purrr::reduce(.f = bind_rows)
## 1 / 100 
## 2 / 100 
## 3 / 100 
## 4 / 100 
## 5 / 100 
## 6 / 100 
## 7 / 100 
## 8 / 100 
## 9 / 100 
## 10 / 100 
## 11 / 100 
## 12 / 100 
## 13 / 100 
## 14 / 100 
## 15 / 100 
## 16 / 100 
## 17 / 100 
## 18 / 100 
## 19 / 100 
## 20 / 100 
## 21 / 100 
## 22 / 100 
## 23 / 100 
## 24 / 100 
## 25 / 100 
## 26 / 100 
## 27 / 100 
## 28 / 100 
## 29 / 100 
## 30 / 100 
## 31 / 100 
## 32 / 100 
## 33 / 100 
## 34 / 100 
## 35 / 100 
## 36 / 100 
## 37 / 100 
## 38 / 100 
## 39 / 100 
## 40 / 100 
## 41 / 100 
## 42 / 100 
## 43 / 100 
## 44 / 100 
## 45 / 100 
## 46 / 100 
## 47 / 100 
## 48 / 100 
## 49 / 100 
## 50 / 100 
## 51 / 100 
## 52 / 100 
## 53 / 100 
## 54 / 100 
## 55 / 100 
## 56 / 100 
## 57 / 100 
## 58 / 100 
## 59 / 100 
## 60 / 100 
## 61 / 100 
## 62 / 100 
## 63 / 100 
## 64 / 100 
## 65 / 100 
## 66 / 100 
## 67 / 100 
## 68 / 100 
## 69 / 100 
## 70 / 100 
## 71 / 100 
## 72 / 100 
## 73 / 100 
## 74 / 100 
## 75 / 100 
## 76 / 100 
## 77 / 100 
## 78 / 100 
## 79 / 100 
## 80 / 100 
## 81 / 100 
## 82 / 100 
## 83 / 100 
## 84 / 100 
## 85 / 100 
## 86 / 100 
## 87 / 100 
## 88 / 100 
## 89 / 100 
## 90 / 100 
## 91 / 100 
## 92 / 100 
## 93 / 100 
## 94 / 100 
## 95 / 100 
## 96 / 100 
## 97 / 100 
## 98 / 100 
## 99 / 100 
## 100 / 100
# plotting
plotdf <- randomdf |> pivot_longer(cols = c("home", "other"),
                                            names_to = "environment",
                                            values_to = "norm_diff")
obsdf <- tibble(environment = c("home", "other"),
                norm_diff = c(obs_home, obs_other))
p <- ggplot(plotdf, aes(x = norm_diff)) +
  geom_density(fill = "grey") +
  facet_wrap(~factor(environment, levels = c("home", "other"),
                     labels = c("home environment", "all 5 other environments"))) +
  geom_vline(data = obsdf, color = "red",
             aes(xintercept = norm_diff)) +
  xlab("decreasing expression at tp0 - increasing expression at tp0,\nnormalized by expression level, averaged across all genes") +
  theme_classic()
p

pdf("paper_figures/Supplement/tp0-simulations.pdf",
    width = 5, height = 2)
p
dev.off()
## quartz_off_screen 
##                 2
# nRandoms with greater norm_diff than observed
# home
sum(randomdf$home > obs_home)
## [1] 0
# other
sum(randomdf$home > obs_other)
## [1] 0