Home | Blog | Software | Publications | GitHub


Visualize chromatin state transitions

A chromatin state transition matrix shows how much the chromatin state in the genome has been changed from e.g. one sample to another. In our recent paper we demonstrated how chromatin states change between smoking people and non-smoking people by means of Chord Diagram. In this post, I will demonstrate how to make such plot by the circlize package and enhance it by adding methylation information.

The data demonstrated in this post is processed from Roadmap data. The chromatin states are learned from five core chromatin marks. Roadmap samples are separated into two groups based on expression profile. In each group, a chromatin state is assigned to the corresponding genomic bin if it is recurrent in at least half of the samples.

The processed data is stored as chromatin_transition.RData.

library(circlize)
## Warning in .doLoadActions(where, attach): trying to execute load actions
## without 'methods' package
load("../data/chromatin_transition.RData")

There are three matrix: mat, meth_mat_1 and meth_mat_2 which are:

mat[1:4, 1:4]
##            TssA TssAFlnk TxFlnk     Tx
## TssA     497200    79600  13400   1800
## TssAFlnk  56400   233200   5000    800
## TxFlnk        0      400  43000   1800
## Tx          800      200    200 166400
meth_mat_1[1:4, 1:4]
##               TssA  TssAFlnk    TxFlnk        Tx
## TssA     0.1647232 0.1580874 0.1917435 0.2690045
## TssAFlnk 0.2591677 0.2689880 0.3616242 0.3411387
## TxFlnk          NA 0.3697514 0.3360386 0.4752722
## Tx       0.8268626 0.7822987 0.5799682 0.6595322

Normally, majority in the genome are unchanged states, thus, we should only look at the regions in which their states are changed.

# proportion of the unchanges states in the genome
sum(diag(mat))/sum(mat)
## [1] 0.6192262
# remove the unchanged states
diag(mat) = 0

When making the plot, actually rows and columns are different (because one is from group 1 and the other is from group 2), thus we give them different names and the original names are stored in all_states.

all_states = rownames(mat)
n_states = nrow(mat)

rownames(mat) = paste0("R_", seq_len(n_states))
colnames(mat) = paste0("C_", seq_len(n_states))

dimnames(meth_mat_1) = dimnames(mat)
dimnames(meth_mat_2) = dimnames(mat)

Next we set the colors. colmat is the color of the links and the colors are represent as hex code. Links have more transparent (A0) if they contain few transitions (< 70th percentile) because we don't want it to disturb the visualization of the major transitions.

state_col = c("TssA" = "#E41A1C",
              "TssAFlnk" = "#E41A1C",
              "TxFlnk" = "#E41A1C",
              "Tx" = "#E41A1C",
              "TxWk" = "#E41A1C",
              "EnhG" = "#E41A1C",
              "Enh" = "#E41A1C",
              "ZNF/Rpts" = "#E41A1C",
              "Het" = "#377EB8",
              "TssBiv" = "#377EB8",
              "BivFlnk" = "#377EB8",
              "EnhBiv" = "#377EB8",
              "ReprPC" = "#377EB8",
              "ReprPCWk" = "#377EB8",
              "Quies" = "black")

# one for rows and one for columns
state_col2 = c(state_col, state_col)
names(state_col2) = c(rownames(mat), colnames(mat))

colmat = rep(state_col2[rownames(mat)], n_states)
colmat = rgb(t(col2rgb(colmat)), maxColorValue = 255)

qati = quantile(mat, 0.7)
colmat[mat > qati] = paste0(colmat[mat > qati], "A0")
colmat[mat <= qati] = paste0(colmat[mat <= qati], "20")
dim(colmat) = dim(mat)

Now we can use chordDiagram() function to make the plot. Here we set one pre-allocated track in which the methylation information will be put.

chordDiagram() returns a data frame which contains coordinates for all links which will be used later.

de is the degree for the “gap” between group 1 and group 2.

de = 360 - (360 - 20 - 30) - 30
circos.par(start.degree = -de/4, gap.degree = c(rep(1, n_states-1), de/2, rep(1, n_states-1), de/2))

cdm_res = chordDiagram(mat, col = colmat, grid.col = state_col2,
    directional = TRUE, annotationTrack = "grid", preAllocateTracks = list(track.height = 0.1))

plot of chunk unnamed-chunk-6

If the degree for a sector is larger than 3 degrees, the index for the state and axis is added. Note since there is already one pre-allocated track, the circular rectangles are in the second track (track.index = 2).

for(sn in get.all.sector.index()) {
    if(abs(get.cell.meta.data("cell.start.degree", sector.index = sn) - 
           get.cell.meta.data("cell.end.degree", sector.index = sn)) > 3) {
        xcenter = get.cell.meta.data("xcenter", sector.index = sn, track.index = 2)
        ycenter = get.cell.meta.data("ycenter", sector.index = sn, track.index = 2)
        i_state = as.numeric(gsub("(C|R)_", "", sn))
        circos.text(xcenter, ycenter, i_state, col = "white", font = 2, cex = 0.7, 
            sector.index = sn, track.index = 2, adj = c(0.5, 0.5), niceFacing = TRUE)
        circos.axis(sector.index = sn, track.index = 2, major.tick.percentage = 0.2, 
            labels.away.percentage = 0.2, labels.cex = 0.5)
    }
}

plot of chunk unnamed-chunk-7

On the top half, it is easy to see the proportion of different transitions in group 1 that come to every state in group 2. However, it is not straightforward for the states in the bottom half to see the proportion of different states in group 2 they transite to. This can be solved by adding small circular rectangles. In following example, the newly added circular rectangles in the bottom half shows e.g. how much the state 15 in group 1 has been transited to different states in group 2.

for(i in seq_len(nrow(cdm_res))) {
    if(cdm_res$value[i] > 0) {
        circos.rect(cdm_res[i, "x1"], -0.5, cdm_res[i, "x1"] - abs(cdm_res[i, "value"]), -0.7, 
            col = state_col2[cdm_res$cn[i]], border = state_col2[cdm_res$cn[i]],
            sector.index = cdm_res$rn[i], track.index = 2)
    }
}

plot of chunk unnamed-chunk-8

Methylation in each category is put on the most outside of the circle. On this track, we will put two paralle rectangles which are mean methylation and methylation difference between group 1 and group 2. Basically, on the bottom, we show meth_mat_2 - meth_mat_1 and on the top we show meth_mat_1 - meth_mat_2.

abs_max = quantile(abs(c(meth_mat_1, meth_mat_2) - 0.5), 0.95, na.rm = TRUE)
col_fun = colorRamp2(c(0.5 - abs_max, 0.5, 0.5 + abs_max), c("blue", "white", "red"))
col_fun2 = colorRamp2(c(-abs_max, 0, abs_max), c("green", "white", "orange"))

ylim = get.cell.meta.data("ylim", sector.index = rownames(mat)[1], track.index = 1)
y1 = ylim[1] + (ylim[2] - ylim[1])*0.4
y2 = ylim[2]
for(i in seq_len(nrow(cdm_res))) {
    if(cdm_res$value[i] > 0) {
        circos.rect(cdm_res[i, "x1"], y1, cdm_res[i, "x1"] - abs(cdm_res[i, "value"]), y1 + (y2-y1)*0.45, 
            col = col_fun(meth_mat_1[cdm_res$rn[i], cdm_res$cn[i]]), 
            border = col_fun(meth_mat_1[cdm_res$rn[i], cdm_res$cn[i]]),
            sector.index = cdm_res$rn[i], track.index = 1)

        circos.rect(cdm_res[i, "x1"], y1 + (y2-y1)*0.55, cdm_res[i, "x1"] - abs(cdm_res[i, "value"]), y2, 
            col = col_fun2(meth_mat_2[cdm_res$rn[i], cdm_res$cn[i]] - meth_mat_1[cdm_res$rn[i], cdm_res$cn[i]]), 
            border = col_fun2(meth_mat_2[cdm_res$rn[i], cdm_res$cn[i]] - meth_mat_1[cdm_res$rn[i], cdm_res$cn[i]]),
            sector.index = cdm_res$rn[i], track.index = 1)

        circos.rect(cdm_res[i, "x2"], y1, cdm_res[i, "x2"] - abs(cdm_res[i, "value"]), y1 + (y2-y1)*0.45, 
            col = col_fun(meth_mat_2[cdm_res$rn[i], cdm_res$cn[i]]), 
            border = col_fun(meth_mat_2[cdm_res$rn[i], cdm_res$cn[i]]),
            sector.index = cdm_res$cn[i], track.index = 1)

        circos.rect(cdm_res[i, "x2"], y1 + (y2-y1)*0.55, cdm_res[i, "x2"] - abs(cdm_res[i, "value"]), y2, 
            col = col_fun2(meth_mat_1[cdm_res$rn[i], cdm_res$cn[i]] - meth_mat_2[cdm_res$rn[i], cdm_res$cn[i]]), 
            border = col_fun2(meth_mat_1[cdm_res$rn[i], cdm_res$cn[i]] - meth_mat_2[cdm_res$rn[i], cdm_res$cn[i]]),
            sector.index = cdm_res$cn[i], track.index = 1)
    }
}

circos.clear()

plot of chunk unnamed-chunk-9

A complete plot with legends looks like:

## Warning: replacing previous import by 'magrittr::%>%' when loading
## 'dendextend'