Skip to content

sirusb/HMMDomainCaller

Repository files navigation

Load required libraries

require(mhsmm)
require(BSgenome.Mmusculus.UCSC.mm10)
require(rtracklayer)
require(dplyr)
require(glue)

# for plotting genome browswer view (optional)
require(Gviz)
require(TxDb.Mmusculus.UCSC.mm10.knownGene) 
require(org.Mm.eg.db)
# just for colorful plotting
require(crayon)

Load the scripts

source("utils.R")

Define the list of bw files

bw_file_paths = list(H2K27me3_MII = "bigWig_examples/H3K27me3_MII_mm10.sorted.Q10.dedup.sorted.bw",
                H2K27me3_2C = "bigWig_examples/H3K27me3_2C_mm10.sorted.Q10.dedup.sorted.bw",
                H2K27me3_4C = "bigWig_examples/H3K27me3_4C_mm10.sorted.Q10.dedup.sorted.bw"
                )

Load the mm10 blacklist regions (optional)

mm10_blacklist = "mm10-blacklist.v2.bed"
mm10_blacklist_gr = data.table::fread(mm10_blacklist)
colnames(mm10_blacklist_gr) = c("seqnames","start","end","Type")
mm10_blacklist_gr = GRanges(mm10_blacklist_gr)
mm10_blacklist_gr = subset(mm10_blacklist_gr, Type == "High Signal Region")

head(mm10_blacklist_gr)
## GRanges object with 6 ranges and 1 metadata column:
##       seqnames          ranges strand |               Type
##          <Rle>       <IRanges>  <Rle> |        <character>
##   [1]    chr10       0-3135400      * | High Signal Region
##   [2]    chr10 4613500-4615400      * | High Signal Region
##   [3]    chr10 4761300-4763900      * | High Signal Region
##   [4]    chr10 6281200-6286700      * | High Signal Region
##   [5]    chr10 6740200-6742100      * | High Signal Region
##   [6]    chr10 7396300-7429800      * | High Signal Region
##   -------
##   seqinfo: 21 sequences from an unspecified genome; no seqlengths

Call Domains

H3K27me3_domains <- list()

for(bw in names(bw_file_paths)){
  bw_file= bw_file_paths[[bw]]
  H3K27me3_domains[[bw]] = CallDomains(bw_file = bw_file,
                                       winsize = 5000,
                                       stepsize = 5000,
                                       training.chrom = glue("chr{1:4}"),
                                       chromsToUse = glue("chr{1:19}"),
                                       genome = 'mm10',
                                       smooth = FALSE,
                                       mm10_blacklist_gr = mm10_blacklist_gr,
                                       saveProbs = FALSE,
                                       plot.model = TRUE,
                                       outDir = "HMMDomains"
                                       )
}
## 
## ******** Processing H3K27me3_MII_mm10.sorted.Q10.dedup.sorted.bw ********
## *) Training model
## Estimating the initial distribution P0

## Initialization parameters

## $P0
## [1] 0.09994937 0.90005063
## 
## $mu
## [1] 19.982045  1.578732
## 
## $sigma
## [1] 9.991023 1.578732
## 
## defining model

## Fitting model
## *) Detecting domains:
## Generating bed file
##  ====================
## Saved in :HMMDomains/H3K27me3_MII_mm10.sorted.Q10.dedup.sorted_5_domains.bed
## 
## 
## ******** Processing H3K27me3_2C_mm10.sorted.Q10.dedup.sorted.bw ********
## *) Training model
## Estimating the initial distribution P0

## Initialization parameters
## $P0
## [1] 0.2079394 0.7920606
## 
## $mu
## [1] 9.181240 1.643701
## 
## $sigma
## [1] 4.590620 1.643701
## 
## defining model

## Fitting model
## *) Detecting domains:
## Generating bed file
##  ====================
## Saved in :HMMDomains/H3K27me3_2C_mm10.sorted.Q10.dedup.sorted_5_domains.bed
## 
## 
## ******** Processing H3K27me3_4C_mm10.sorted.Q10.dedup.sorted.bw ********
## *) Training model
## Estimating the initial distribution P0

## Initialization parameters


## $P0
## [1] 0.3265698 0.6734302
## 
## $mu
## [1] 6.389107 1.593652
## 
## $sigma
## [1] 2.880374 1.499496
## 
## defining model

## Fitting model
## *) Detecting domains:
## Generating bed file
##  ====================
## Saved in :HMMDomains/H3K27me3_4C_mm10.sorted.Q10.dedup.sorted_5_domains.bed

Check the output folder

list.files("HMMDomains/",full.names = T)
## [1] "HMMDomains/H3K27me3_2C_mm10.sorted.Q10.dedup.sorted_5_domains.bed" 
## [2] "HMMDomains/H3K27me3_4C_mm10.sorted.Q10.dedup.sorted_5_domains.bed" 
## [3] "HMMDomains/H3K27me3_MII_mm10.sorted.Q10.dedup.sorted_5_domains.bed"

Visualize the detected domains

Let’s display the Hoxc locus as an example

hoxc.gr = GRanges("chr15:100859671-104043685")
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene

TxByGns <- genes(txdb)
TxByGns = subset(TxByGns, seqnames == seqlevels(hoxc.gr))

TxByGns$gene_name <- mapIds(org.Mm.eg.db,
                            keys = as.character(TxByGns$gene_id),
                            column = "SYMBOL",
                            keytype = "ENTREZID",
                            multiVals = "first")

TxDbTrack <- GeneRegionTrack(TxByGns,chromosome=seqlevels(hoxc.gr),gene = TxByGns$gene_name) 

gtrack <- GenomeAxisTrack()
bw_tracks <- c(gtrack, TxDbTrack)

seqlen = seqlengths(BSgenome.Mmusculus.UCSC.mm10)[seqlevels(hoxc.gr)]
region <- GRanges(seqlevels(hoxc.gr), IRanges(1,seqlen))

for(bw in names(bw_file_paths)){
  
  bw_file= bw_file_paths[[bw]]
  coverage <- import.bw(bw_file,which=region)
  dt <- DataTrack(coverage,chomosome=seqlevels(hoxc.gr),name = glue("{bw} signal")) 
  bw_tracks = c(bw_tracks, dt)
  
  atrack <- AnnotationTrack(H3K27me3_domains[[bw]], name = glue("{bw} domains"),chromosome = seqlevels(hoxc.gr))
  bw_tracks = c(bw_tracks, atrack)
}

plotTracks(bw_tracks, 
             chromosome=seqlevels(hoxc.gr),
             from = start(hoxc.gr),
             to = end(hoxc.gr),
             type="h",
           transcriptAnnotation="gene") 

Releases

No releases published

Packages

No packages published

Languages