version: April 29, 2021

DOI


This is the analysis pipeline to analyze data generated from 3D models of fate tracked Montastraea cavernosa infected with stony coral tissue loss disease. For the manuscript **Combs IR, Studivan MS, Voss JD. (2020)


All analyses performed with R version 4.0.3

Basic setup of R environment


Loading required packages

For the following analyses we will require the use of a number of different R packages. Most of which can be sourced from CRAN, but some must be downloaded from GitHub. We can use the following code to load in the packages and install any packages not previously installed in the R console.

if (!require("pacman")) install.packages("pacman")
pacman::p_load("ggplot2", "FSA", "officer", "gridExtra", "ggpubr", "Rmisc", "rcompanion", "RColorBrewer", "vegan", "nparcomp", "RVAideMemoire", "pairwiseAdonis", "PMCMR", "PMCMRplus", "patchwork", "magrittr","reshape2", "stringr", "dplyr", "flextable", "permuco")
pacman::p_load_gh("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")



Loading and subsetting


Loading the dataset into R, and subsetting as needed, we will be making multiple subsets, subsetting by time point and by site to make things easier to workwith.

corr <- read.csv("../data/S2_Dataset.csv", header = T)
str(corr)
head(corr)

# Changing values to cm^2
corr$total.colony.size <- corr$total.colony.size*10000
corr$healthy.area <- corr$healthy.area*10000
corr$disease.area <- corr$disease.area*10000

# I am reordering the dates in my data set to ensure 
# that they display chronologically.
corr$site = factor(corr$site, levels=unique(corr$site)) 
class(corr$date)
corr$date = as.factor(corr$date)
levels(corr$date)
corr$date = factor(corr$date, levels(corr$date)[c(3, 4, 1, 2)])
levels(corr$date)

# Subsetting by timepoint, and then resetting the factor level for each timepoint
corr.t1 <- subset(corr, date == '8.24.18')
droplevels(corr.t1$date)

corr.t2 <- subset(corr, date == '9.11.18')
droplevels(corr.t2$date)

corr.t3 <- subset(corr, date == '11.8.18')
droplevels(corr.t3$date)

corr.t4 <- subset(corr, date == '12.17.18')
droplevels(corr.t4$date)

# This will be used when we analyze rates of loss and disease progression
corr.loss <- subset(corr, date != '8.24.18')
droplevels(corr.loss$date)



# Subsetting by Site 
# (you can reset the factor for each site if needed from the code above)
corr.bc1 <- subset(corr, site == 'BC1')
corr.t328 <- subset(corr, site == 'T328')
corr.ftl4 <- subset(corr, site == 'FTL4')

# Average colony size from each site
mean(corr.bc1$total.colony.size)
mean(corr.t328$total.colony.size)
mean(corr.ftl4$total.colony.size)

# Subsetting the first time point from each location
corr.t1.bc1 <- subset(corr, site == "BC1")
corr.t1.t328 <- subset(corr, site == 'T328')
corr.t1.ftl4 <- subset(corr, site == "FTL4")

# Initial average colony size for each site
mean(corr.t1.bc1$total.colony.size)
mean(corr.t1.t328$total.colony.size)
mean(corr.t1.ftl4$total.colony.size)



Assessing normality


A Shapiro-Wilk Normality Test will be run on our 3 main variables to assess normality

shapiro.total <- shapiro.test(corr$total.colony.size)
shapiro.total
## 
##  Shapiro-Wilk normality test
## 
## data:  corr$total.colony.size
## W = 0.68843, p-value = 5.694e-13
shapiro.healthy <-  shapiro.test(corr$healthy.area)
shapiro.healthy
## 
##  Shapiro-Wilk normality test
## 
## data:  corr$healthy.area
## W = 0.6942, p-value = 7.679e-13
shapiro.disease <- shapiro.test(corr$disease.area)
shapiro.disease
## 
##  Shapiro-Wilk normality test
## 
## data:  corr$disease.area
## W = 0.6443, p-value = 6.517e-14



Friedman Rank Sum Test


Performs a Friedman rank sum test with unreplicated blocked data on the three major area measurements: total colony area, healthy tissue area, and lesion area.

fried.total <- friedmanTest(y=corr$total.colony.size, groups = corr$date, blocks = corr$colony.id)
fried.total 

fried.disease<- friedmanTest(y=corr$disease.area, groups = corr$date, blocks = corr$colony.id)
fried.disease

fried.healthy <- friedmanTest(y=corr$healthy.area, groups = corr$date, blocks = corr$colony.id)
fried.healthy



Pairwise Comparisons using the Nemenyi test


Pairwise comparisons of significant Friedman’s tests and manually adjusting the p values using a Bonferroni correction

# Total colony size
post.total <- posthoc.friedman.nemenyi.test(total.colony.size ~ date | colony.id, data = corr)
total.p.adjust <- p.adjust(post.total$p.value, method = "bonferroni")

# Making a string of the test statistic and melting 
# it into a column for making tables later
post.total.statistic = c(4.269075, 7.589466,9.012491, 3.320392, 4.743416, 1.423025)
post.total.statistic.melt = melt(post.total.statistic, id = 1)

#taking pvalues from Nemenyi test and manually correcting them
total.val <- c(0.0135,4.8e-07, 1.1e-09, 0.0875, 0.0044, 0.7458 )

# Using a bonferroni correction
adjust.total.val <- p.adjust(total.val, method = 'bonferroni')

# Now we are melting this vector (changing it from a row to a column) 
# so we can incorporate into a table later on
p.total.melt <- melt(adjust.total.val, id=1)



############################################
# Healthy tissue area
post.healthy <- posthoc.friedman.nemenyi.test(healthy.area ~ date | colony.id, data = corr)
post.healthy.statistic = c(4.743416, 7.115125,8.380036, 2.371708, 3.636619, 1.264911)
post.healthy.statistic.melt = melt(post.total.statistic, id = 1)

#taking pvalues from Nemenyi test and manually correcting them

healthy.val <- c(0.0044, 2.9e-06,  1.9e-08, 0.3358,0.0497, 0.8078 )

# Using a bonferroni correction
adjust.healthy.val <- p.adjust(healthy.val, method = 'bonferroni')

# Now we are melting this vector (changing it from a row to a column) 
# so we can incorporate into a table later on
p.healthy.melt <- melt(adjust.healthy.val, id=1)



############################################
#Lesion area
post.disease <- posthoc.friedman.nemenyi.test(disease.area ~ date | colony.id, data = corr)
post.disease.statistic = c(1.8973666, 0.6324555,3.1622777, 1.264911, 5.059644, 3.794733)
post.disease.statistic.melt = melt(post.total.statistic, id = 1)

#taking pvalues from Nemenyi test and manually correcting them
disease.val <- c(0.536, 0.970,  0.114, 0.808,0.002,0.037 )



# Using a bonferroni correction
adjust.disease.val <- p.adjust(disease.val, method = 'bonferroni')

# Now we are melting this vector (changing it from a row to a column) so we can incorporate into a table later on
p.disease.melt <- melt(adjust.disease.val, id=1)



Now we are building a table and exporting it as a .docx file for use in publications, reporting, etc.

friedTab = data.frame("Test" = "Total Colony Size", "Comparison" = " ", "Statistic" = fried.total[["statistic"]][["Friedman chi-squared"]], "p.value" = fried.total$p.value[1]) %>% 
  add_row("Test" = " ", "Comparison" = c("T1 - T2", "T1 - T3", "T1 - T4", "T2 - T3", "T2 - T4", "T3 - T4"), "Statistic" = post.total.statistic.melt$value, "p.value" = p.total.melt$value) %>% 
  add_row("Test" = "Healthy Tissue Area", "Comparison" = " ", "Statistic" =  fried.healthy[["statistic"]][["Friedman chi-squared"]], "p.value" = fried.healthy$p.value[1]) %>% 
  add_row("Test" = " ", "Comparison" = c("T1 - T2", "T1 - T3", "T1 - T4", "T2 - T3", "T2 - T4", "T3 - T4"), "Statistic" = post.healthy.statistic.melt$value, "p.value" = p.healthy.melt$value) %>% 
  add_row("Comparison" = " ", "Test" = "Disease Lesion Area", "Statistic" =  fried.disease[["statistic"]][["Friedman chi-squared"]], "p.value" = fried.disease$p.value[1]) %>% 
  add_row("Test" = " ", "Comparison" = c("T1 - T2", "T1 - T3", "T1 - T4", "T2 - T3", "T2 - T4", "T3 - T4"), "Statistic" = post.disease.statistic.melt$value, "p.value" = p.disease.melt$value) %>% 

mutate_if(is.numeric, round, digits = 3) %>%     
mutate(p.value = replace(p.value, p.value >= 0.05, "ns")) %>%
mutate(p.value = replace(p.value, p.value < 0.001, "< 0.001")) %>% 
mutate_if(is.character, str_replace_all, pattern = "-", replacement = "–") %>%
flextable() %>%
set_header_labels(Data.set = "Data set") %>% 
flextable::compose(part = "header", j = "Statistic", value = as_paragraph("Test Statistic")) %>%
flextable::compose(part = "header", j = "p.value", value = as_paragraph(as_i("p"), "-value")) %>% 
autofit() %>%
font(fontname = "Times New Roman", part = "all") %>%
fontsize(size = 12, part = "all") %>%
bold(part = "header") %>% 
colformat_num(j = "Statistic", digits = 2) %>%
colformat_num(j = "p.value", digits = 4, na_str = "ns") %>% 
align_nottext_col(align = "center", header = TRUE, footer = TRUE) %>% 
align(align = "center", j = "p.value")

friedDoc = read_docx()
friedDoc = body_add_flextable(friedDoc, value = friedTab)
print(friedDoc, target = "../tables/Table2.docx")
friedTab

Test

Comparison

Test Statistic

p-value

Total Colony Size

48.55

< 0.001

T1 – T2

4.27

ns

T1 – T3

7.59

< 0.001

T1 – T4

9.01

< 0.001

T2 – T3

3.32

ns

T2 – T4

4.74

0.026

T3 – T4

1.42

ns

Healthy Tissue Area

41.29

< 0.001

T1 – T2

4.27

0.026

T1 – T3

7.59

< 0.001

T1 – T4

9.01

< 0.001

T2 – T3

3.32

ns

T2 – T4

4.74

ns

T3 – T4

1.42

ns

Disease Lesion Area

14.02

0.003

T1 – T2

4.27

ns

T1 – T3

7.59

ns

T1 – T4

9.01

ns

T2 – T3

3.32

ns

T2 – T4

4.74

0.012

T3 – T4

1.42

ns



Correlation Analyses


Multiple correlation analyses were carried out to see how different colony metrics affected disease manifestation and progress.

# Looking at correlation between total colony size and disease lesion area

colonysize.vs.lesionarea <- cor.test(corr$total.colony.size,corr$disease.area, method="spearman", use="complete.obs")
colonysize.vs.lesionarea
## 
##  Spearman's rank correlation rho
## 
## data:  corr$total.colony.size and corr$disease.area
## S = 125793, p-value = 0.1535
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1468166



Colony size was not correlated with disease lesion area or the number of disease lesions. So, bigger colonies have no more (or less) disease than smaller colonies.

Plotting


We are now going to make a number of box plots and correlation plots to visualize the data we just analyzed. I will display the code for one of each (as the code is the same just replacing variable names) and then I will compile the plots using the R package patchwork.

Box Plots

# This forces y axis labels to have 1 decimal place.
scale <- function(x) sprintf("%.0f", x)
# Plotting a boxplot of mean total colony size for each site at each timepoint. 
total.colony.box.1 <-
  ggplot(data = corr, aes(x = date, y = total.colony.size))+
  geom_boxplot(aes(fill = site), alpha = 1, outlier.shape = NA, color = "black") +
  geom_point(aes(fill = site), color = "black", size = 1.75, position = position_jitterdodge()) +
  scale_fill_manual(values = c("#7BA46C", "#EACF9E", "#008D91")) +
  scale_x_discrete(labels = c(expression('T'[1]), expression('T'[2]), expression('T'[3]), expression('T'[4])))+
  labs(x = "Time", y = bquote("Total Colony Area" ~ (cm^2)),fill = 'Site') +
  scale_y_continuous(labels = scale)+
  theme(legend.title = element_blank(),
        legend.text = element_text(color = "black", size = 18),
        legend.background = element_blank(),
        legend.position = "bottom")+
  facet_grid(.~site,scales="free")

total.colony.box <- total.colony.box.1 + theme(
  panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"),
  panel.background = element_rect(fill = '#F5F5F5'),
  plot.title = element_text(hjust = 0.5), 
  axis.line = element_line(colour = "black"), 
  axis.ticks = element_line(color="black"), 
  text = element_text(size = 22, color="black"), 
  axis.text.x=element_text(size = 18, color="black"), 
  axis.text.y=element_text(size = 18, color="black"),
  legend.position = "none")

total.colony.box

We repeat this using healthy tissue area, disease lesion area and lesion count.

# This forces y axis labels to have 1 decimal place.
scale <- function(x) sprintf("%.0f", x)
# Plotting a boxplot of mean healthy tissue area for each site at each timepoint. 
healthy.colony.box.1 <-
  ggplot(data = corr, aes(x = date, y = healthy.area))+
  geom_boxplot(aes(fill = site), alpha = 1, outlier.shape = NA, color = "black") +
  geom_point(aes(fill = site), color = "black", size = 1.75, position = position_jitterdodge()) +
  scale_fill_manual(values = c("#7BA46C", "#EACF9E", "#008D91")) +
  scale_x_discrete(labels = c(expression('T'[1]), expression('T'[2]), expression('T'[3]), expression('T'[4])))+
  labs(x = "Time", y = bquote("Healthy Tissue Area" ~ (cm^2)),fill = 'Site') +
  scale_y_continuous(labels = scale)+
  theme(legend.title = element_blank(),
        legend.text = element_text(color = "black", size = 18),
        legend.background = element_blank(),
        legend.position = "bottom")+
  facet_grid(.~site,scales="free")

healthy.colony.box <- healthy.colony.box.1 + theme(
  panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"),
  panel.background = element_rect(fill = '#F5F5F5'),
  plot.title = element_text(hjust = 0.5), 
  axis.line = element_line(colour = "black"), 
  axis.ticks = element_line(color="black"), 
  text = element_text(size = 22, color="black"), 
  axis.text.x=element_text(size = 18, color="black"), 
  axis.text.y=element_text(size = 18, color="black"),
  legend.position = "none")

healthy.colony.box

# Plotting box plot of mean disease lesion area for each site at each timepoint.
disease.area.box.1 <-
  ggplot(data = corr, aes(x = date, y = disease.area))+
  geom_boxplot(aes(fill = site), alpha = 1, outlier.shape = NA, color = "black") +
  geom_point(aes(fill = site), color = "black", size = 1.75, position = position_jitterdodge()) +
  scale_fill_manual(values = c("#7BA46C", "#EACF9E", "#008D91")) +
  scale_x_discrete(labels = c(expression('T'[1]), expression('T'[2]), expression('T'[3]), expression('T'[4])))+
  labs(x = "Time", y = bquote('Disease Lesion Area'~(cm^2)), fill = 'Site') + 
  scale_y_continuous(labels = scale)+
  theme(legend.title = element_blank(),
        legend.text = element_text(color = "black", size = 18),
        legend.background = element_blank(),
        legend.position = "bottom")+
  facet_grid(.~site,scales="free")

disease.area.box <- disease.area.box.1 + theme(
  panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"),
  panel.background = element_rect(fill = '#F5F5F5'),
  plot.title = element_text(hjust = 0.5), 
  axis.line = element_line(colour = "black"), 
  axis.ticks = element_line(color="black"), 
  text = element_text(size = 22, color="black"), 
  axis.text.x=element_text(size = 18, color="black"), 
  axis.text.y=element_text(size = 18, color="black"),
  legend.position = "none")

disease.area.box

box.plots.patch <- (total.colony.box | healthy.colony.box | disease.area.box) + plot_layout(guides = 'collect') + plot_annotation(tag_levels = 'a') & 
  theme(plot.tag = element_text(size = 28))
ggsave("../figures/Fig4.eps", plot = box.plots.patch, width = 15, height = 5, units = "in", dpi = 600)
ggsave("../figures/Fig4.png", plot = box.plots.patch, width = 15, height = 5, units = "in", dpi = 600)



Correlation Plots

Here I will be showing the code for a significant and a non-significant correlation plot as there will be slight difference in the code showing the correlation trend.

colony.size.disease.area.1 <-ggplot(corr, aes(x = total.colony.size, y = disease.area, color = site))+
  geom_point(size = 5)+
  scale_color_manual(values = c("#7BA46C", "#EACF9E", "#008D91"))+
  labs(x = bquote('Total Colony Area'~(cm^2)), y = bquote('Disease Lesion Area'~(cm^2)), color = "Site") +
  scale_y_continuous(labels = scale)

colony.size.disease.area <- colony.size.disease.area.1 + 
  theme(panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"),                 
        panel.background = element_rect(fill = '#F5F5F5'),
        plot.title = element_text(hjust = 0.5), 
        axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(color="black"), 
        text = element_text(size=20, color="black"), 
        axis.text.x=element_text(size=14, color="black"), 
        axis.text.y=element_text(size=14, color="black"),
        axis.title.y = element_text(size = 18, color = 'black'),
       legend.position = "bottom")
colony.size.disease.area 



correlation.plots.patch <- (colony.size.disease.area / guide_area() + plot_layout(nrow = 2, guides = 'collect', heights = c(5,1)))
ggsave("../figures/Fig6.png", plot = correlation.plots.patch, width = 5, height = 5, units = "in", dpi = 600)
ggsave("../figures/Fig6.eps", plot = correlation.plots.patch, width = 5, height = 5, units = "in", dpi = 600)




Rates of Loss


Rates of loss were calculated between timepoints, looking at correlations. Kruskal-Wallis Rank Sum Tests were used to determine if rates of loss varied by site, and correlation analyses were used to determine if colony metrics were related to rates of tissue loss.

#subset
corr.loss <- subset(corr, date != "8.24.18")
corr.loss[]<- lapply(corr.loss, function(x) if(is.factor(x)) factor(x) else x)
levels(corr.loss$date)
fried.rate <- friedmanTest(y=corr.loss$rate, groups = corr.loss$date, blocks = corr.loss$colony.id)
fried.rate 
## 
##  Friedman rank sum test
## 
## data:  y, groups and blocks
## Friedman chi-squared = 2.5833, df = 2, p-value = 0.2748
fried.loss <- friedmanTest(y=corr.loss$loss, groups = corr.loss$date, blocks = corr.loss$colony.id)
fried.loss 
## 
##  Friedman rank sum test
## 
## data:  y, groups and blocks
## Friedman chi-squared = 6.25, df = 2, p-value = 0.04394
loss.vs.total.colony.size <- cor.test(corr.loss$loss,corr.loss$total.colony.size, method="spearman", use="complete.obs")
loss.vs.total.colony.size
## 
##  Spearman's rank correlation rho
## 
## data:  corr.loss$loss and corr.loss$total.colony.size
## S = 65028, p-value = 0.704
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.04554116
rate.l1.kw <- kruskal.test(loss ~ site, data = corr.t2)
rate.l1.kw
## 
##  Kruskal-Wallis rank sum test
## 
## data:  loss by site
## Kruskal-Wallis chi-squared = 3.0704, df = 2, p-value = 0.2154
rate.l2.kw <- kruskal.test(loss ~ site, data=corr.t3)
rate.l2.kw
## 
##  Kruskal-Wallis rank sum test
## 
## data:  loss by site
## Kruskal-Wallis chi-squared = 1.9647, df = 2, p-value = 0.3744
rate.l3.kw <- kruskal.test(loss ~ site, data=corr.t4)
rate.l3.kw
## 
##  Kruskal-Wallis rank sum test
## 
## data:  loss by site
## Kruskal-Wallis chi-squared = 2.6535, df = 2, p-value = 0.2653
lossKwTab = data.frame("Data" = "Rate of Tissue Loss", "Test" = "Kruskal-Wallis", "Comparison" = "T1 v T2", "TestStatistic" = rate.l1.kw[["statistic"]][["Kruskal-Wallis chi-squared"]], "p.value" = rate.l1.kw[["p.value"]]) %>% 
  add_row(data.frame("Data" = " ", "Test" =  " ", "Comparison" =  "T2 v T3 ", "TestStatistic" = rate.l2.kw[["statistic"]][["Kruskal-Wallis chi-squared"]], "p.value" = rate.l2.kw[["p.value"]])) %>% 
add_row(data.frame("Data" = " ", "Test" =  " ", "Comparison" =  "T3 v T4 ", "TestStatistic" = rate.l3.kw[["statistic"]][["Kruskal-Wallis chi-squared"]], "p.value" = rate.l3.kw[["p.value"]])) %>% 
        
mutate_if(is.numeric, round, digits = 3) %>%     
mutate(p.value = replace(p.value, p.value >= 0.05, NA)) %>%
mutate(p.value = replace(p.value, p.value < 0.001, "< 0.001")) %>% 
mutate_if(is.character, str_replace_all, pattern = "vs", replacement = "–") %>% 
flextable() %>%
set_header_labels(Data.set = "Data") %>% 
flextable::compose(part = "header", j = "TestStatistic", value = as_paragraph("Test Statistic")) %>%
flextable::compose(part = "header", j = "p.value", value = as_paragraph(as_i("p"), "-value")) %>% 
autofit() %>%
font(fontname = "Times New Roman", part = "all") %>%
fontsize(size = 12, part = "all") %>%
bold(part = "header") %>% 
colformat_num(j = "TestStatistic", digits = 2) %>%
colformat_num(j = "p.value", digits = 4, na_str = "ns") %>% 
align(align = "center", j = "p.value") %>% 
align(align = "center", j = "TestStatistic")
lossKwDoc = read_docx()
lossKwDoc = body_add_flextable(lossKwDoc, value = lossKwTab)
print(lossKwDoc, target = "../tables/TableS1.docx")
lossKwTab

Data

Test

Comparison

Test Statistic

p-value

Rate of Tissue Loss

Kruskal-Wallis

T1 v T2

3.07

ns

T2 v T3

1.97

ns

T3 v T4

2.65

ns



Correlation analyses


Correlation analyses were run on rates of loss and various colony metrics.

corr.loss.spear <- cor.test(corr.loss$loss, corr.loss$total.colony.size, method = 'spearman', use = 'complete.obs')

Visualizing rates of loss


Now using same types plots as before, but for loss data. I am excluding the first time point, and will subset accordingly. And use the same code as above.

#------------box plot for L1---------------------------
scale <- function(x) sprintf("%.0f", x)
# boxplots comparing shape areas to template
loss.box.1 <- ggplot(data = corr.loss, aes(x = date, y = loss))+
  geom_boxplot(aes(fill = date), alpha = 1, outlier.shape = NA, color = "black") +
  geom_point(aes(fill = date), color = "black", size = 3, position = position_jitterdodge()) +
  scale_fill_manual(values = c("#A4A4BF", "#888C46", "#A3586D")) +
  scale_x_discrete(labels = c(expression('T'[1]-'T'[2]), expression('T'[2]-'T'[3]), expression('T'[3]-'T'[4])))+
  labs(x = "Time Point ", y = bquote('Tissue Loss'~(cm^2/wk)), fill = 'Site') +
  scale_y_continuous(labels = scale)+
  theme(legend.title = element_blank(),
        legend.text = element_text(color = "black", size = 25),
        legend.background = element_blank())+
  facet_grid(.~site,scales="free")
loss.box.1

loss.box <- loss.box.1 + theme(
  panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"),
  panel.background = element_rect(fill = '#F5F5F5'),
  plot.title = element_text(hjust = 0.5), 
  axis.line = element_line(colour = "black"), 
  axis.ticks = element_line(color="black"), 
  text = element_text(size=15, color="black"), 
  axis.text.x=element_text(size=15, color="black", angle = 90), 
  axis.text.y=element_text(size=20, color="black"),
  axis.title.y = element_text(size =25, color = 'black'),
  axis.title.x = element_text(size = 25, color = 'black'))+
  rremove('legend')
loss.box

#Correlation plot for colony size and rate of tissue loss

colony.size.loss.1 <-ggplot(corr.loss, aes(x = total.colony.size, y = loss, color = site))+
  geom_point(size = 5)+
  scale_color_manual(values = c("#7BA46C", "#EACF9E", "#008D91"))+
  labs(x = bquote('Total Colony Size'~(cm^2)), y = bquote('Tissue  Loss'~(cm^2/wk)), color = "Site") +
  scale_y_continuous(labels = scale)

colony.size.loss <- colony.size.loss.1 + 
  theme(panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"),                 
        panel.background = element_rect(fill = '#F5F5F5'),
        plot.title = element_text(hjust = 0.5), 
        axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(color="black"), 
        text = element_text(size=15, color="black"), 
  axis.text.x=element_text(size=20, color="black"), 
  axis.text.y=element_text(size=20, color="black"),
  axis.title.y = element_text(size =25, color = 'black'),
  axis.title.x = element_text(size = 25, color = 'black'))

colony.size.loss 

loss.panel <- loss.box + colony.size.loss + guide_area()+ plot_layout(nrow = 1, guides = 'collect', widths = c(5,5,1)) + plot_annotation(tag_levels = 'a')

ggsave("../figures/Fig5.png", plot= loss.panel, width=15, height=10, units="in", dpi=600)
ggsave("../figures/Fig5.eps", plot= loss.panel, width=15, height=10, units="in", dpi=600)

# Proportion of Loss *** Proportions of loss were calculated for each time point and for total colony area, healthy tissue, and disease area, to understand the average proportion of tissue lost throughout the study.

prop <- read.csv("../data/prop.melt.csv", header = TRUE)
prop$site = factor(prop$site, levels=unique(prop$site)) 
class(prop$t1)
## [1] "character"
prop$t1 = as.factor(prop$t1)
levels(prop$t1)
## [1] "11.8.18"  "12.17.18" "8.24.18"  "9.11.18"
prop$t1 = factor(prop$t1, levels(prop$t1)[c(3, 4, 1, 2)])
levels(prop$t1)
## [1] "8.24.18"  "9.11.18"  "11.8.18"  "12.17.18"
prop$loss <- 1-prop$proportion.of.total



prop.loss <- subset(prop, t1 != "8.24.18")


prop.loss$loss <- 1-prop.loss$proportion.of.total

prop.losst4 <- subset(prop.loss, t1 == "12.17.18")

prop.losst2 <- subset(prop.loss, t1 == "9.11.18")
prop.losst3 <- subset(prop.loss, t1 == "11.8.18")
prop.losst4 <- subset(prop.loss, t1 == "12.17.18")


mean(prop.losst2$loss)
## [1] 0.1079532
mean(prop.losst3$loss)
## [1] 0.2677326
mean(prop.losst4$loss)
## [1] 0.370928