title: "BUSCO plots"
author: "zagor"
date: " 09 10 2019"
output: html_document
editor_options:
chunk_output_type: console
knitr::opts_chunk$set(echo = TRUE, fig.width=12, fig.height=9)
myTable = read.table(file = "../intermediate/BUSCOall_embryophyta_odb9.tsv",
header = FALSE,
sep = "\t",
quote = "",
stringsAsFactors = FALSE,
fill=TRUE)
head(myTable)
myTable2 = read.table(file = "../intermediate/evg_out.tsv",
header = FALSE,
sep = "\t",
quote = "",
stringsAsFactors = FALSE,
fill=TRUE)
myTable = rbind(myTable2, myTable)
nrow(myTable)
myTable[myTable[,3] == "",3] = "/"
myTable[is.na(myTable[,2]),2] = "/"
tmp = myTable[1,1]
for (i in 2:nrow(myTable)) {
temp = myTable[i,1]
if (temp == "") {
temp = tmp
myTable[i,1] = temp
}
else {
tmp = temp
}
}
myTable[,1] = gsub("short_summary_BUSCO_", "", myTable[,1])
myTable[,1] = gsub(".fasta_embryophyta_odb9", "", myTable[,1])
myTable[,1] = gsub(".aa_embryophyta_odb9", "", myTable[,1])
myTable[,1] = gsub("stCuSTr-", "", myTable[,1])
myTable[16:30,1] = c("D_pre_cdhit-2d_rep+alt",
"D_pre_cdhit-2d_rep+alt",
"D_pre_cdhit-2d_rep+alt",
"D_pre_cdhit-2d_rep+alt",
"D_pre_cdhit-2d_rep+alt",
"P_pre_cdhit-2d_rep+alt",
"P_pre_cdhit-2d_rep+alt",
"P_pre_cdhit-2d_rep+alt",
"P_pre_cdhit-2d_rep+alt",
"P_pre_cdhit-2d_rep+alt",
"R_pre_cdhit-2d_rep+alt",
"R_pre_cdhit-2d_rep+alt",
"R_pre_cdhit-2d_rep+alt",
"R_pre_cdhit-2d_rep+alt",
"R_pre_cdhit-2d_rep+alt")
myTable[61:105,1] = c("D_post_cdhit-2d_rep+alt",
"D_post_cdhit-2d_rep+alt",
"D_post_cdhit-2d_rep+alt",
"D_post_cdhit-2d_rep+alt",
"D_post_cdhit-2d_rep+alt",
"P_post_cdhit-2d_rep+alt",
"P_post_cdhit-2d_rep+alt",
"P_post_cdhit-2d_rep+alt",
"P_post_cdhit-2d_rep+alt",
"P_post_cdhit-2d_rep+alt",
"R_post_cdhit-2d_rep+alt",
"R_post_cdhit-2d_rep+alt",
"R_post_cdhit-2d_rep+alt",
"R_post_cdhit-2d_rep+alt",
"R_post_cdhit-2d_rep+alt",
"D_post_cdhit-2d_rep",
"D_post_cdhit-2d_rep",
"D_post_cdhit-2d_rep",
"D_post_cdhit-2d_rep",
"D_post_cdhit-2d_rep",
"P_post_cdhit-2d_rep",
"P_post_cdhit-2d_rep",
"P_post_cdhit-2d_rep",
"P_post_cdhit-2d_rep",
"P_post_cdhit-2d_rep",
"R_post_cdhit-2d_rep",
"R_post_cdhit-2d_rep",
"R_post_cdhit-2d_rep",
"R_post_cdhit-2d_rep",
"R_post_cdhit-2d_rep",
"D_post_cdhit-2d_alt",
"D_post_cdhit-2d_alt",
"D_post_cdhit-2d_alt",
"D_post_cdhit-2d_alt",
"D_post_cdhit-2d_alt",
"P_post_cdhit-2d_alt",
"P_post_cdhit-2d_alt",
"P_post_cdhit-2d_alt",
"P_post_cdhit-2d_alt",
"P_post_cdhit-2d_alt",
"R_post_cdhit-2d_alt",
"R_post_cdhit-2d_alt",
"R_post_cdhit-2d_alt",
"R_post_cdhit-2d_alt",
"R_post_cdhit-2d_alt")
myTable[31:45,1] = c("D_utrorf_separated",
"D_utrorf_separated",
"D_utrorf_separated",
"D_utrorf_separated",
"D_utrorf_separated",
"P_utrorf_separated",
"P_utrorf_separated",
"P_utrorf_separated",
"P_utrorf_separated",
"P_utrorf_separated",
"R_utrorf_separated",
"R_utrorf_separated",
"R_utrorf_separated",
"R_utrorf_separated",
"R_utrorf_separated")
myTable[46:60,1] = c("D_eliminated_alt",
"D_eliminated_alt",
"D_eliminated_alt",
"D_eliminated_alt",
"D_eliminated_alt",
"P_eliminated_alt",
"P_eliminated_alt",
"P_eliminated_alt",
"P_eliminated_alt",
"P_eliminated_alt",
"R_eliminated_alt",
"R_eliminated_alt",
"R_eliminated_alt",
"R_eliminated_alt",
"R_eliminated_alt")
ind = union( which(myTable[,2] == "/"), which(myTable[,3] == "/"))
data = myTable[-ind, ]
colnames(data) <-c("DataSet", "BUSCOs", "BUSCOclass")
data$BUSCOclass = ordered(data$BUSCOclass, levels = unique(data$BUSCOclass))
data$DataSet = ordered(data$DataSet, levels = unique(data$DataSet))
data$BUSCOs = as.numeric(data$BUSCOs)
data$perc = format(as.numeric(data$BUSCOs)/1440*100, digits = 0, nsmall = 1)
library(plyr)
yy = NULL
for (i in seq(1,84,4)) {
yy = c(yy,(rev(c(i:(i+3)))))
}
data$BUSCOs_y = data$BUSCOs[yy]
data$BUSCOclass_y = data$BUSCOclass[yy]
data <- ddply(data, "DataSet",
transform, label_ypos=cumsum(BUSCOs_y))
data$perc = format(as.numeric(data$BUSCOs_y)/1440*100, digits = 0, nsmall = 1)
library(ggplot2)
ggplot(data = data, aes(x = DataSet, y = BUSCOs, fill = BUSCOclass), ylab = "BUSCOs", font.axis=2) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-0.3, size=10)) +
theme(legend.position = "bottom") +
geom_text(aes(y=label_ypos, label=perc), vjust=1.0, color="white", size=3.5)
selection = data[c(grep("*_evigene_initial_*", data[,1]),
13:24,
49:60), ]
tmp = data
data = selection
ggplot(data = data, aes(x = DataSet, y = BUSCOs, fill = BUSCOclass), ylab = "BUSCOs", font.axis=2) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-0.3, size=10)) +
theme(legend.position = "right") +
geom_text(aes(y=label_ypos, label=perc), vjust=1.0, color="white", size=3.5)
data=tmp
selection = data[c(grep("*_evigene_initial_*", data[,1]),
13:24,
49:60,
25:48), ]
selection$DataSet = ordered(selection$DataSet, levels = unique(selection$DataSet))
tmp = data
data = selection
ggplot(data = data, aes(x = DataSet, y = BUSCOs, fill = BUSCOclass), ylab = "BUSCOs", font.axis=3) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-0.3, size=10)) +
theme(legend.position = "right") +
geom_text(aes(y=label_ypos, label=perc), vjust=1.0, color="white", size=3.5)
data=tmp
selection = data[c(grep("*_evigene_initial_*", data[,1]),
13:24,
49:60,
25:48), ]
selection$DataSet = ordered(selection$DataSet, levels = unique(selection$DataSet))
tmp = data
data.0 = data
data = selection
ggplot(data = data, aes(x = DataSet, y = BUSCOs, fill = BUSCOclass), ylab = "BUSCOs", font.axis=3) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-0.3, size=10)) +
theme(legend.position = "top")
data=tmp
myTable3 = read.table(file = "../intermediate/BUSCO_stPanTr_embryophyta_odb9.tsv",
header = FALSE,
sep = "\t",
quote = "",
stringsAsFactors = FALSE,
fill=TRUE)
colnames(myTable3) <-c("DataSet", "BUSCOs", "BUSCOclass")
myTable3 = myTable3[-c(1:5),]
myTable3$DataSet[c(1,6,11)] = c("stPanTr_alt", "stPanTr_rep", "stPanTr_rep+alt")
myTable3 = myTable3[c(11:15,6:10,1:5),]
myTable3[myTable3[,3] == "",3] = "/"
myTable3[is.na(myTable3[,2]),2] = "/"
tmp = myTable3[1,1]
for (i in 2:nrow(myTable3)) {
temp = myTable3[i,1]
if (temp == "") {
temp = tmp
myTable3[i,1] = temp
}
else {
tmp = temp
}
}
ind = union( which(myTable3[,2] == "/"), which(myTable3[,3] == "/"))
data = myTable3[-ind, ]
colnames(data) <-c("DataSet", "BUSCOs", "BUSCOclass")
data$BUSCOclass = ordered(data$BUSCOclass, levels = unique(data$BUSCOclass))
data$DataSet = ordered(data$DataSet, levels = unique(data$DataSet))
data$BUSCOs = as.numeric(data$BUSCOs)
data$perc = format(as.numeric(data$BUSCOs)/1440*100, digits = 0, nsmall = 1)
yy = NULL
for (i in seq(1,nrow(data),4)) {
yy = c(yy,(rev(c(i:(i+3)))))
}
data$BUSCOs_y = data$BUSCOs[yy]
data$BUSCOclass_y = data$BUSCOclass[yy]
data <- ddply(data, "DataSet",
transform, label_ypos=cumsum(BUSCOs_y))
data$perc = format(as.numeric(data$BUSCOs_y)/1440*100, digits = 0, nsmall = 1)
ggplot(data = data, aes(x = DataSet, y = BUSCOs, fill = BUSCOclass), ylab = "BUSCOs", font.axis=2) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-0.3, size=10)) +
theme(legend.position = "bottom") +
geom_text(aes(y=label_ypos, label=perc), vjust=1.0, color="white", size=3.5)
for article
data.all = data.0[c(1:24,49:60),]
data.all$DataSet = gsub("D_", "Desiree_", data.all$DataSet)
data.all$DataSet = gsub("P_", "PW363_", data.all$DataSet)
data.all$DataSet = gsub("R_", "Rywal_", data.all$DataSet)
i1 = grep("Desiree", data.all$DataSet)
i2 = grep("PW363", data.all$DataSet)
i3 = grep("Rywal", data.all$DataSet)
data.all = data.all[c(i1, i2, i3), ]
data.all = rbind(data.all, data)
data = as.data.frame(data.all)
data$DataSet = gsub("pre_cdhit-2d", "1stFiltering", data$DataSet)
data$DataSet = gsub("post_cdhit-2d", "2ndFiltering", data$DataSet)
data$DataSet = ordered(data$DataSet, levels = unique(data$DataSet))
library(ggthemes)
# 1440 = (2*2*2*2*2*3*3*5)
ggplot(data = data, aes(x = DataSet, y = BUSCOs, fill = BUSCOclass), ylab = "BUSCOs", font.axis=2) +
theme_bw() + coord_cartesian(ylim=c(0, 1440)) + scale_y_continuous(breaks=seq(0, 1440, 240)) +
geom_bar(stat = "identity", alpha = 0.9, width = 0.66) + theme(aspect.ratio = 1.0) + # geom_rangeframe() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-0.3, size=10)) +
theme(legend.position = "right") +
geom_text(aes(y=label_ypos, label=BUSCOs_y), vjust=1.0, color="white", size=3.5) +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
theme(
axis.title.x = element_text(color = "blue", size = 14, face = "bold", hjust = NULL, vjust = -1.25,),
axis.title.y = element_text(color = "#993333", size = 14, face = "bold")) +
guides(fill=guide_legend(ncol=1))
ggplot(data = data, aes(x = DataSet, y = BUSCOs, fill = BUSCOclass), ylab = "BUSCOs", font.axis=2) +
theme_bw() + coord_cartesian(ylim=c(0, 1440)) + scale_y_continuous(breaks=seq(0, 1440, 240)) +
geom_bar(stat = "identity", alpha = 0.9, width = 0.66) + theme(aspect.ratio = 1.0) + # geom_rangeframe() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-0.3, size=10)) +
theme(legend.position = "bottom") +
geom_text(aes(y=label_ypos, label=BUSCOs_y), vjust=1.0, color="white", size=3.5) +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
theme(
axis.title.x = element_text(color = "blue", size = 14, face = "bold", hjust = NULL, vjust = -1.25,),
axis.title.y = element_text(color = "#993333", size = 14, face = "bold")) +
guides(fill=guide_legend(ncol=2))
ggplot(data = data, aes(x = DataSet, y = BUSCOs, fill = BUSCOclass), ylab = "BUSCOs", font.axis=2) +
theme_bw() + coord_cartesian(ylim=c(0, 1440)) + scale_y_continuous(breaks=seq(0, 1440, 240)) +
geom_bar(stat = "identity", alpha = 0.9, width = 0.66) + theme(aspect.ratio = 1.0) + # geom_rangeframe() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-0.3, size=10)) +
theme(legend.position = "top") +
geom_text(aes(y=label_ypos, label=BUSCOs_y), vjust=1.0, color="white", size=3.5) +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
theme(
axis.title.x = element_text(color = "blue", size = 14, face = "bold", hjust = NULL, vjust = -1.25,),
axis.title.y = element_text(color = "#993333", size = 14, face = "bold")) +
guides(fill=guide_legend(ncol=2))
Supplementary
# i = c(grep("_utrorf_separated", data.0$DataSet), grep("_eliminated_", data.0$DataSet))
# data = rbind(data,data.0[i,])
# data$DataSet = gsub("D_", "Desiree_", data$DataSet)
# data$DataSet = gsub("P_", "PW363_", data$DataSet)
# data$DataSet = gsub("R_", "Rywal_", data$DataSet)
# data$DataSet = gsub("_all", "_rep+alt", data$DataSet)