Background

Libraries and test data

Packages

  • circlize can be installed from within R
  • other packages used in this tutorial are tidyverse, GenomicFeatures, GenomicRanges, and rtracklayer
install.packages("circlize")
  • you can also use conda/mamba, or the pixi to install dependencies in a dedicated environment:
pixi init
pixi add r-circlize
...
  • to render this notebook automatically with the enclosed pixi env, run:
pixi run test-notebook
  • to start an interactive shell with the environment, run:
pixi shell --environment circlize
  • load required libraries
suppressPackageStartupMessages({
  library(tidyverse)
  library(circlize)
  library(Biostrings)
  library(GenomicRanges)
  library(GenomicFeatures)
  library(rtracklayer)
})

Import utility functions

  • validate_genomic_input takes as input two data frames, one with genomic coordinates and one with chromosome information, and checks if coordinates correspond
  • plot_circlize takes as input two objects, a DNA sequence as DNAStringSet and a GRangesList with genomic features
  • from this data it will automatically plot a circular (genome) map with standard features and tracks
  • additional features or data can be plotted as additional tracks, see examples below
source("../source/circlize.R")

Import genome annotation

  • we import a *.fasta and a *.gff file corresponding to the same genome assembly
  • we truncate the genome seqname(s) such that GFF and FASTA match
fasta <- Biostrings::readDNAStringSet("../data/spyogenes_genome.fna")
gff <- rtracklayer::import("../data/spyogenes_genome.gff")

names(fasta) <- stringr::str_split_i(names(fasta), "[ \\|]", 1)

Check annotation data

  • the plotting function contains an internal function to validate the genomic coordinates
  • however we can also check this up front and make corrections if necessary
# genome info
df_chroms <- data.frame(
  name = names(fasta),
  start = rep(0, length(fasta)),
  end = width(fasta)
)

# gene annotation
genes <- gff[gff$type == "gene"]
df_genes <- tibble(
  chr = as.character(seqnames(genes)),
  start = start(genes),
  end = end(genes)
)

# validate if genomic coordinates from annotation and chromosome info correspond
df_genes <- validate_genomic_input(df_genes, df_chroms)
  • we can also prepare extra data tracks that we supply as a named list including the desired settings
extra <- list(
  experiment = list(
    data = data.frame(
      chr = "NC_002737.2",
      start = df_genes$start[seq(1, nrow(df_genes), by = 10)],
      end = df_genes$end[seq(1, nrow(df_genes), by = 10)],
      value = rnorm(ceiling(nrow(df_genes) / 10), mean = 10, sd = 5)
    ),
    type = "points",
    color = "#96389f",
    height = 0.07,
    ylim = c(0, 20)
  )
)

extra[["experiment2"]] <- list(
  data = data.frame(
    chr = "NC_002737.2",
    start = df_genes$start[seq(1, nrow(df_genes), by = 10)],
    end = df_genes$end[seq(1, nrow(df_genes), by = 10)],
    value = rep(1, ceiling(nrow(df_genes) / 10))
  ),
  type = "rect",
  color = sample(colors(), ceiling(nrow(df_genes) / 10))
)

Plot Circos plot and save to disk

  • use PNG to not get extremely large figures as can happen with vector graphics like PDF or SVG
  • plotting can take a while as there is a lot of information
png("../output/circlize.png", width = 2000, height = 2000, res = 300)
plot_circlize(fasta, gff, extra = extra)
dev.off()

LS0tCnRpdGxlOiAiUGxvdCBDaXJjb3MgcGxvdHMgd2l0aCBSIGBjaXJjbGl6ZWAiCmF1dGhvcjogTWljaGFlbCBKYWhuCmRhdGU6ICJgciBmb3JtYXQoU3lzLnRpbWUoKSwgJyVkICVCLCAlWScpYCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0aGVtZTogY29zbW8KICAgIHRvYzogbm8KICAgIG51bWJlcl9zZWN0aW9uczogbm8KICBodG1sX2RvY3VtZW50OgogICAgdG9jOiBubwogICAgZGZfcHJpbnQ6IHBhZ2VkCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKYGBgCgojIyBCYWNrZ3JvdW5kCgotIGBjaXJjbGl6ZWAgaXMgYSBwb3dlcmZ1bCBSIHBhY2thZ2UgdG8gcGxvdCBjaXJjdWxhciB2aXN1YWxpemF0aW9ucywgc28gY2FsbGVkICdDaXJjb3MnIHBsb3RzCi0gQ2lyY29zIHBsb3RzIGFyZSBhIGdyZWF0IHdheSB0byB2aXN1YWxpemUgZ2Vub21pYyBkYXRhIGluIGEgY29tcGFjdCBhbmQgaW5mb3JtYXRpdmUgd2F5Ci0gdHlwaWNhbGx5LCB0aGV5IGNvbnNpc3Qgb2YgYSBjaXJjdWxhciBsYXlvdXQgd2l0aCBkaWZmZXJlbnQgdHJhY2tzIHJlcHJlc2VudGluZyB2YXJpb3VzIGdlbm9taWMgZmVhdHVyZXMsIHN1Y2ggYXMgYW5ub3RhdGVkIGdlbmVzLCBHQyBjb250ZW50IGFuZCBHQyBza2V3LCBhbmQgb3ZlcmxhaWQgY292ZXJhZ2Ugb3IgaW50ZXJhY3Rpb24gZGF0YQoKIyMgTGlicmFyaWVzIGFuZCB0ZXN0IGRhdGEKCiMjIyBQYWNrYWdlcwoKLSBgY2lyY2xpemVgIGNhbiBiZSBpbnN0YWxsZWQgZnJvbSB3aXRoaW4gUgotIG90aGVyIHBhY2thZ2VzIHVzZWQgaW4gdGhpcyB0dXRvcmlhbCBhcmUgYHRpZHl2ZXJzZWAsIGBHZW5vbWljRmVhdHVyZXNgLCBgR2Vub21pY1Jhbmdlc2AsIGFuZCBgcnRyYWNrbGF5ZXJgCgpgYGB7ciwgZXZhbCA9IEZBTFNFfQppbnN0YWxsLnBhY2thZ2VzKCJjaXJjbGl6ZSIpCmBgYAoKLSB5b3UgY2FuIGFsc28gdXNlIGNvbmRhL21hbWJhLCBvciB0aGUgcGl4aSB0byBpbnN0YWxsIGRlcGVuZGVuY2llcyBpbiBhIGRlZGljYXRlZCBlbnZpcm9ubWVudDoKCmBgYHtiYXNoLCBldmFsID0gRkFMU0V9CnBpeGkgaW5pdApwaXhpIGFkZCByLWNpcmNsaXplCi4uLgpgYGAKCi0gdG8gcmVuZGVyIHRoaXMgbm90ZWJvb2sgYXV0b21hdGljYWxseSB3aXRoIHRoZSBlbmNsb3NlZCBwaXhpIGVudiwgcnVuOgoKYGBge2Jhc2gsIGV2YWwgPSBGQUxTRX0KcGl4aSBydW4gdGVzdC1ub3RlYm9vawpgYGAKCi0gdG8gc3RhcnQgYW4gaW50ZXJhY3RpdmUgc2hlbGwgd2l0aCB0aGUgZW52aXJvbm1lbnQsIHJ1bjoKCmBgYHtiYXNoLCBldmFsID0gRkFMU0V9CnBpeGkgc2hlbGwgLS1lbnZpcm9ubWVudCBjaXJjbGl6ZQpgYGAKCi0gbG9hZCByZXF1aXJlZCBsaWJyYXJpZXMKCmBgYHtyfQpzdXBwcmVzc1BhY2thZ2VTdGFydHVwTWVzc2FnZXMoewogIGxpYnJhcnkodGlkeXZlcnNlKQogIGxpYnJhcnkoY2lyY2xpemUpCiAgbGlicmFyeShCaW9zdHJpbmdzKQogIGxpYnJhcnkoR2Vub21pY1JhbmdlcykKICBsaWJyYXJ5KEdlbm9taWNGZWF0dXJlcykKICBsaWJyYXJ5KHJ0cmFja2xheWVyKQp9KQpgYGAKCiMjIyBJbXBvcnQgdXRpbGl0eSBmdW5jdGlvbnMKCi0gYHZhbGlkYXRlX2dlbm9taWNfaW5wdXRgIHRha2VzIGFzIGlucHV0IHR3byBkYXRhIGZyYW1lcywgb25lIHdpdGggZ2Vub21pYyBjb29yZGluYXRlcyBhbmQgb25lIHdpdGggY2hyb21vc29tZSBpbmZvcm1hdGlvbiwgYW5kIGNoZWNrcyBpZiBjb29yZGluYXRlcyBjb3JyZXNwb25kCi0gYHBsb3RfY2lyY2xpemVgIHRha2VzIGFzIGlucHV0IHR3byBvYmplY3RzLCBhIEROQSBzZXF1ZW5jZSBhcyBgRE5BU3RyaW5nU2V0YCBhbmQgYSBgR1Jhbmdlc0xpc3RgIHdpdGggZ2Vub21pYyBmZWF0dXJlcwotIGZyb20gdGhpcyBkYXRhIGl0IHdpbGwgYXV0b21hdGljYWxseSBwbG90IGEgY2lyY3VsYXIgKGdlbm9tZSkgbWFwIHdpdGggc3RhbmRhcmQgZmVhdHVyZXMgYW5kIHRyYWNrcwotIGFkZGl0aW9uYWwgZmVhdHVyZXMgb3IgZGF0YSBjYW4gYmUgcGxvdHRlZCBhcyBhZGRpdGlvbmFsIHRyYWNrcywgc2VlIGV4YW1wbGVzIGJlbG93CgpgYGB7cn0Kc291cmNlKCIuLi9zb3VyY2UvY2lyY2xpemUuUiIpCmBgYAoKIyMjIEltcG9ydCBnZW5vbWUgYW5ub3RhdGlvbgoKLSB3ZSBpbXBvcnQgYSBgKi5mYXN0YWAgYW5kIGEgYCouZ2ZmYCBmaWxlIGNvcnJlc3BvbmRpbmcgdG8gdGhlIHNhbWUgZ2Vub21lIGFzc2VtYmx5Ci0gd2UgdHJ1bmNhdGUgdGhlIGdlbm9tZSBzZXFuYW1lKHMpIHN1Y2ggdGhhdCBHRkYgYW5kIEZBU1RBIG1hdGNoCgpgYGB7cn0KZmFzdGEgPC0gQmlvc3RyaW5nczo6cmVhZEROQVN0cmluZ1NldCgiLi4vZGF0YS9zcHlvZ2VuZXNfZ2Vub21lLmZuYSIpCmdmZiA8LSBydHJhY2tsYXllcjo6aW1wb3J0KCIuLi9kYXRhL3NweW9nZW5lc19nZW5vbWUuZ2ZmIikKCm5hbWVzKGZhc3RhKSA8LSBzdHJpbmdyOjpzdHJfc3BsaXRfaShuYW1lcyhmYXN0YSksICJbIFxcfF0iLCAxKQpgYGAKCiMjIyBDaGVjayBhbm5vdGF0aW9uIGRhdGEKCi0gdGhlIHBsb3R0aW5nIGZ1bmN0aW9uIGNvbnRhaW5zIGFuIGludGVybmFsIGZ1bmN0aW9uIHRvIHZhbGlkYXRlIHRoZSBnZW5vbWljIGNvb3JkaW5hdGVzCi0gaG93ZXZlciB3ZSBjYW4gYWxzbyBjaGVjayB0aGlzIHVwIGZyb250IGFuZCBtYWtlIGNvcnJlY3Rpb25zIGlmIG5lY2Vzc2FyeQoKYGBge3J9CiMgZ2Vub21lIGluZm8KZGZfY2hyb21zIDwtIGRhdGEuZnJhbWUoCiAgbmFtZSA9IG5hbWVzKGZhc3RhKSwKICBzdGFydCA9IHJlcCgwLCBsZW5ndGgoZmFzdGEpKSwKICBlbmQgPSB3aWR0aChmYXN0YSkKKQoKIyBnZW5lIGFubm90YXRpb24KZ2VuZXMgPC0gZ2ZmW2dmZiR0eXBlID09ICJnZW5lIl0KZGZfZ2VuZXMgPC0gdGliYmxlKAogIGNociA9IGFzLmNoYXJhY3RlcihzZXFuYW1lcyhnZW5lcykpLAogIHN0YXJ0ID0gc3RhcnQoZ2VuZXMpLAogIGVuZCA9IGVuZChnZW5lcykKKQoKIyB2YWxpZGF0ZSBpZiBnZW5vbWljIGNvb3JkaW5hdGVzIGZyb20gYW5ub3RhdGlvbiBhbmQgY2hyb21vc29tZSBpbmZvIGNvcnJlc3BvbmQKZGZfZ2VuZXMgPC0gdmFsaWRhdGVfZ2Vub21pY19pbnB1dChkZl9nZW5lcywgZGZfY2hyb21zKQpgYGAKCi0gd2UgY2FuIGFsc28gcHJlcGFyZSBleHRyYSBkYXRhIHRyYWNrcyB0aGF0IHdlIHN1cHBseSBhcyBhIG5hbWVkIGxpc3QgaW5jbHVkaW5nIHRoZSBkZXNpcmVkIHNldHRpbmdzCiAgCmBgYHtyfQpleHRyYSA8LSBsaXN0KAogIGV4cGVyaW1lbnQgPSBsaXN0KAogICAgZGF0YSA9IGRhdGEuZnJhbWUoCiAgICAgIGNociA9ICJOQ18wMDI3MzcuMiIsCiAgICAgIHN0YXJ0ID0gZGZfZ2VuZXMkc3RhcnRbc2VxKDEsIG5yb3coZGZfZ2VuZXMpLCBieSA9IDEwKV0sCiAgICAgIGVuZCA9IGRmX2dlbmVzJGVuZFtzZXEoMSwgbnJvdyhkZl9nZW5lcyksIGJ5ID0gMTApXSwKICAgICAgdmFsdWUgPSBybm9ybShjZWlsaW5nKG5yb3coZGZfZ2VuZXMpIC8gMTApLCBtZWFuID0gMTAsIHNkID0gNSkKICAgICksCiAgICB0eXBlID0gInBvaW50cyIsCiAgICBjb2xvciA9ICIjOTYzODlmIiwKICAgIGhlaWdodCA9IDAuMDcsCiAgICB5bGltID0gYygwLCAyMCkKICApCikKCmV4dHJhW1siZXhwZXJpbWVudDIiXV0gPC0gbGlzdCgKICBkYXRhID0gZGF0YS5mcmFtZSgKICAgIGNociA9ICJOQ18wMDI3MzcuMiIsCiAgICBzdGFydCA9IGRmX2dlbmVzJHN0YXJ0W3NlcSgxLCBucm93KGRmX2dlbmVzKSwgYnkgPSAxMCldLAogICAgZW5kID0gZGZfZ2VuZXMkZW5kW3NlcSgxLCBucm93KGRmX2dlbmVzKSwgYnkgPSAxMCldLAogICAgdmFsdWUgPSByZXAoMSwgY2VpbGluZyhucm93KGRmX2dlbmVzKSAvIDEwKSkKICApLAogIHR5cGUgPSAicmVjdCIsCiAgY29sb3IgPSBzYW1wbGUoY29sb3JzKCksIGNlaWxpbmcobnJvdyhkZl9nZW5lcykgLyAxMCkpCikKYGBgCgojIyMgUGxvdCBDaXJjb3MgcGxvdCBhbmQgc2F2ZSB0byBkaXNrCgotIHVzZSBQTkcgdG8gbm90IGdldCBleHRyZW1lbHkgbGFyZ2UgZmlndXJlcyBhcyBjYW4gaGFwcGVuIHdpdGggdmVjdG9yIGdyYXBoaWNzIGxpa2UgUERGIG9yIFNWRwotIHBsb3R0aW5nIGNhbiB0YWtlIGEgd2hpbGUgYXMgdGhlcmUgaXMgYSBsb3Qgb2YgaW5mb3JtYXRpb24KCmBgYHtyLCBtZXNzYWdlID0gRkFMU0UsIHdhcm5pbmcgPSBGQUxTRSwgcmVzdWx0cyA9ICJoaWRlIn0KcG5nKCIuLi9vdXRwdXQvY2lyY2xpemUucG5nIiwgd2lkdGggPSAyMDAwLCBoZWlnaHQgPSAyMDAwLCByZXMgPSAzMDApCnBsb3RfY2lyY2xpemUoZmFzdGEsIGdmZiwgZXh0cmEgPSBleHRyYSkKZGV2Lm9mZigpCmBgYAoKYGBge3IsIGVjaG8gPSBGQUxTRX0KIyBkaXNwbGF5IFBORyBmaWxlIGhlcmUKa25pdHI6OmluY2x1ZGVfZ3JhcGhpY3MoIi4uL291dHB1dC9jaXJjbGl6ZS5wbmciKQpgYGAK