title: "merge all wc"
author: "zagor"
date: "9 October 2019"
output: html_document
knitr::opts_chunk$set(echo = TRUE)
library(plyr)
library(dplyr)
library(data.table)
stCusTr = read.table(file = "../../../_S_03_stCuSTr/_A_03.2_components/output/3cv_cdhit-2d_weak-components.txt",
header = TRUE,
sep = "\t",
quote = NULL,
dec = ".",
stringsAsFactors = FALSE,
na.strings = "NA", fill = TRUE,
comment.char = "#")
stCusTr = stCusTr[, c(6,2,4,7)]
colnames(stCusTr) = c("wcID1", "cvID1", "geneID", "classID1")
ind = c(which(stCusTr$classID == "representative"), which(stCusTr$classID == "alternative"))
stCusTr = stCusTr[ind , ]
stPanTr = read.table(file = "../../_A_02_components_1_3cvs-gffmerged/intermediate_2_prepare-network/pantr_evigene-header-network_unfiltered_weak-components_perWC_with-rep.txt",
header = FALSE,
sep = "\t",
quote = NULL,
dec = ".",
stringsAsFactors = FALSE,
na.strings = "NA", fill = TRUE,
comment.char = "#")
stPanTr = stPanTr[, c(1,2,4,5)]
colnames(stPanTr) = c("wcID2", "cvID2", "geneID", "classID2")
nrow(stCusTr) + length(grep("Sotub", stPanTr$geneID)) + length(grep("PGSC", stPanTr$geneID))
combo = NULL
stPanTr.1 <- data.table(stPanTr[, c(3,1,2,4)], key = "geneID")
stCusTr.1 <- data.table(stCusTr[, c(3,1,2,4)], key = "geneID")
combo = join(stPanTr.1, stCusTr.1, type = "full", match = "all", by = "geneID")
nrow(combo)
head(combo)
ind = which(combo$classID1 == "alternative")
head(ind)
combo[head(ind), ]
combo$classID2[is.na(combo$classID2)] = "cv_alternative"
ind1 = grep("Sotub", combo$geneID)
ind2 = grep("PGSC", combo$geneID)
gffmerge.wcID2 = combo$wcID2[c(ind1, ind2)]
gffmerge.cvID2 = combo$cvID2[c(ind1, ind2)]
comboTranslate = NULL
translate = cbind(combo$wcID1, combo$wcID2)
colnames(translate) = c("wcID1", "wcID2.1")
translate = data.table(translate, key = "wcID1")
translate = translate[!is.na(translate$wcID1),]
translate = translate[!is.na(translate$wcID2.1),]
nrow(translate) == nrow(stPanTr) - length(grep("Sotub", stPanTr$geneID)) - length(grep("PGSC", stPanTr$geneID))
comboTranslate = join(combo, translate, type = "full", match = "all", by = "wcID1")
length(which((is.na(comboTranslate$wcID2.1)))) == length(grep("Sotub", stPanTr$geneID)) + length(grep("PGSC", stPanTr$geneID))
head(comboTranslate)
combo$wcID2 = comboTranslate$wcID2.1
combo$wcID2[c(ind1, ind2)] = gffmerge.wcID2
any(is.na(combo$wcID2))
comboTranslate = NULL
translate = cbind(combo$wcID1, combo$cvID2)
colnames(translate) = c("wcID1", "cvID2.1")
translate = data.table(translate, key = "wcID1")
translate = translate[!is.na(translate$wcID1),]
translate = translate[!is.na(translate$cvID2.1),]
nrow(translate) == nrow(stPanTr) - length(grep("Sotub", stPanTr$geneID)) - length(grep("PGSC", stPanTr$geneID))
comboTranslate = join(combo, translate, type = "full", match = "all", by = "wcID1")
length(which((is.na(comboTranslate$cvID2.1)))) == length(grep("Sotub", stPanTr$geneID)) + length(grep("PGSC", stPanTr$geneID))
combo$cvID2 = comboTranslate$cvID2.1
combo$cvID2[c(ind1, ind2)] = gffmerge.cvID2
any(is.na(combo$wcID2))
ind = grep("Sotub", combo$geneID)
combo$cvID1[ind] = "Sotub"
combo$wcID1[ind] = "gffmerge"
combo$classID1[ind] = "gffmerge"
ind = grep("PGSC", combo$geneID)
combo$cvID1[ind] = "PGSC"
combo$wcID1[ind] = "gffmerge"
combo$classID1[ind] = "gffmerge"
unique(combo$cvID1)
unique(combo$cvID2)
combo = combo[with(combo, order(wcID2, wcID1, geneID)), ]
comboTranslate = setDT(combo)
comboTranslate$geneCount = 1
comboTranslate = comboTranslate[, list(sum(geneCount, na.rm = FALSE)), by = wcID2]
dim(comboTranslate)
colnames(comboTranslate) = c("wcID2", "geneCount")
tmp = combo[, c(1:2)]
tmp = join(tmp, comboTranslate, type = "full", match = "all", by = "wcID2")
head(tmp)
nrow(tmp) == nrow(combo)
ind = match(tmp$geneID, combo$geneID)
any(ind != seq(1,nrow(combo),1))
combo$geneCount = tmp$geneCount
head(combo)
write.table(x = combo,
file = "../output/5cv_weak-components.txt",
append = FALSE,
quote = FALSE,
sep = "\t",
eol = "\n", na = "NA", dec = ".",
row.names = FALSE,
col.names = TRUE)
combo$classID2[combo$classID2 == "cv_alternative"] = "alternative1"
combo$classID2[combo$classID2 == "alternative"] = "alternative2"
interesting = combo[which(combo$cvID2 == "D|||P|||R|||g"),]
range(interesting$geneCount)
interesting = interesting[which(interesting$geneCount %in% seq(8, 16)), ]
length(unique(interesting$wcID2))
dir.create("../output/D_P_R_g")
myWC = sort(unique(interesting$wcID2))
for (i in myWC) {
subset = interesting[which( i == interesting$wcID2),]
subset = arrange(subset, desc(classID2), desc(cvID1), desc(classID1))
fname = i
write.table(x = subset$geneID,
file = paste0("../output/D_P_R_g/", i, ".txt"),
append = FALSE,
quote = FALSE,
sep = "\t",
eol = "\n", na = "NA", dec = ".",
row.names = FALSE,
col.names = FALSE)
}
count = read.table(file = "../output/5cv_weak-components.txt",
header = TRUE,
sep = "\t",
quote = NULL,
dec = ".",
stringsAsFactors = FALSE,
na.strings = "NA", fill = TRUE,
comment.char = "#")
count = count[count$classID2 == "representative" | count$classID2 == "alternative",]
library(VennDiagram)
A <- count$geneID[grep("D", count$cvID2)]
B <- count$geneID[grep("P", count$cvID2)]
C <- count$geneID[grep("R", count$cvID2)]
D <- count$geneID[grep("g", count$cvID2)]
DD = count[grep("g", count$cvID2), ]
table(DD$cvID2)
venn.plot <- venn.diagram(
x = list(
Phureja = D,
Desiree = A,
PW363 = B,
Rywal = C
),
filename = "../reports/stPanTr_geneCount_weak-components.tiff",
col = "transparent",
fill = c("cornflowerblue", "green", "yellow", "darkorchid1"),
alpha = 0.50,
label.col = c("orange", "white", "darkorchid4", "white",
"white", "white", "white", "white", "darkblue", "white",
"white", "white", "white", "darkgreen", "white"),
cex = 1.25,
fontfamily = "serif",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange", "darkorchid4"),
cat.cex = 1.5,
cat.pos = 15,# 30,
cat.dist = 0.05,# 0.07,
cat.fontfamily = "serif",
rotation.degree = 180,
margin = 0.1
)
sessionInfo()