hNAD = read.table("data/hNAD.bed",stringsAsFactors = F,as.is = T)
hNAD[, 4] <- hNAD[, 3] - hNAD[, 2]
colnames(hNAD) <- c('chr', 'start', 'end', 'length')
hNAD_boundary <- data.frame(rbind(as.matrix(hNAD[, c(1, 2)]), as.matrix(hNAD[, c(1, 3)])))
other = read.table("/lustre/user/liclab/pengt/projects/nucleus/new/NAIR/other.bed",stringsAsFactors = F,as.is = T)
other[, 4] <- as.numeric(other[, 3]) - as.numeric(other[, 2])
colnames(other) <- c('chr', 'start', 'end', 'length')
other_boundary <- data.frame(rbind(as.matrix(other[, c(1, 2)]), as.matrix(other[, c(1, 3)])))
TAD_boundary = read.table("data/all.boundary",as.is = T)
TAD_boundary$V5 = TAD_boundary$V2-1
TAD_boundary = TAD_boundary[,c(1,5)]
TAD_boundary = as.matrix(TAD_boundary)
insulationScore <- read.table("data/all.score",stringsAsFactors = F)
colnames(insulationScore) <- c('chr', 'start', 'end', 'insulation_score')
generate_boundary_insulatin_score_matrix <- function( dataframe, insulationScore, window = '2mb' ){
# 生成一个矩阵,每一行都是一个boundary,包括了其上下游个1Mb的insulationScore的信息
boundary_insulationScore <- matrix(nrow = dim(dataframe)[1], ncol = 101 )
# 每一个循环处理一个TAD boundary
# 第一步:取出insulationScore
# 第二步:放入新的matrix中
# 分三种情况:boundary在N, C末端,或者在中间位置。
for(i in seq(1, dim(dataframe)[1])){
chr <- dataframe[i, 1] # chromosome name
sub_insulationScore <- insulationScore[insulationScore$chr == chr, ] # 此boundary所在的染色体的score矩阵
index = which( as.numeric(dataframe[i, 2]) <= as.numeric(sub_insulationScore$end) )[1] # 此boundary所在的bin的位置
if(index < 51){ # N terminal
score <- sub_insulationScore[ ( 1 : (index + 50)) , ]$insulation_score
boundary_insulationScore[i, ( 51 - index + 1) : 101 ] <- score
}else if( (dim(sub_insulationScore)[1] - index) < 50){ # C terminal
score <- sub_insulationScore[ ( (index - 50) : dim(sub_insulationScore)[1] ) , ]$insulation_score
boundary_insulationScore[i, 1: ( 51 + (dim(sub_insulationScore)[1] - index) )] <- score
}else{ # In the middle
boundary_insulationScore[i,] <- sub_insulationScore[ ((index - 50) : (index + 50)), ]$insulation_score
}
}
return(boundary_insulationScore)
}
TAD_boundary_insulationScore <- generate_boundary_insulatin_score_matrix(TAD_boundary, insulationScore)
hNAD_boundary_insulationScore <- generate_boundary_insulatin_score_matrix(as.matrix(hNAD_boundary) , insulationScore)
other_boundary_insulationScore <- generate_boundary_insulatin_score_matrix(as.matrix(other_boundary) , insulationScore)
plot.new()
plot.window(xlim = c(0, 110), ylim =c(-0.6, 0.3) )
lines(1:101, apply(TAD_boundary_insulationScore, 2, median), col = '#00685A', lwd = 2)
lines(1:101, apply(na.omit(hNAD_boundary_insulationScore), 2, median), col = '#4074BA', lwd = 2)
lines(1:101, apply(na.omit(other_boundary_insulationScore), 2, median), col = '#FF7304', lwd = 2)
axis(side = 1, lwd = 2,at = c(0,25,50,75,100), tck= -0.05,labels = F)
text(0,-0.6,'-1Mb')
text(50,-0.6,'0')
text(100,-0.6,'1Mb')
axis(side = 2, lwd = 2, at = c(-0.6, -0.3, 0, 0.3), tck= -0.05,labels = T,las = 1)
legend("topleft", legend = c("TAD",'hNADs',"Other NADs"),
col = c('#00685A', '#4074BA','#FF7304'
), lwd=2,bty = "n")