Packages
circlize can be installed from within R
- other packages used in this tutorial are
tidyverse,
GenomicFeatures, GenomicRanges, and
rtracklayer
install.packages("circlize")
- you can also use conda/mamba, or the pixi to install dependencies in
a dedicated environment:
pixi init
pixi add r-circlize
...
- to render this notebook automatically with the enclosed pixi env,
run:
pixi run test-notebook
- to start an interactive shell with the environment, run:
pixi shell --environment circlize
suppressPackageStartupMessages({
library(tidyverse)
library(circlize)
library(Biostrings)
library(GenomicRanges)
library(GenomicFeatures)
library(rtracklayer)
})
Import genome annotation
- we import a
*.fasta and a *.gff file
corresponding to the same genome assembly
- we truncate the genome seqname(s) such that GFF and FASTA match
fasta <- Biostrings::readDNAStringSet("../data/spyogenes_genome.fna")
gff <- rtracklayer::import("../data/spyogenes_genome.gff")
names(fasta) <- stringr::str_split_i(names(fasta), "[ \\|]", 1)
Check annotation data
- the plotting function contains an internal function to validate the
genomic coordinates
- however we can also check this up front and make corrections if
necessary
# genome info
df_chroms <- data.frame(
name = names(fasta),
start = rep(0, length(fasta)),
end = width(fasta)
)
# gene annotation
genes <- gff[gff$type == "gene"]
df_genes <- tibble(
chr = as.character(seqnames(genes)),
start = start(genes),
end = end(genes)
)
# validate if genomic coordinates from annotation and chromosome info correspond
df_genes <- validate_genomic_input(df_genes, df_chroms)
- we can also prepare extra data tracks that we supply as a named list
including the desired settings
extra <- list(
experiment = list(
data = data.frame(
chr = "NC_002737.2",
start = df_genes$start[seq(1, nrow(df_genes), by = 10)],
end = df_genes$end[seq(1, nrow(df_genes), by = 10)],
value = rnorm(ceiling(nrow(df_genes) / 10), mean = 10, sd = 5)
),
type = "points",
color = "#96389f",
height = 0.07,
ylim = c(0, 20)
)
)
extra[["experiment2"]] <- list(
data = data.frame(
chr = "NC_002737.2",
start = df_genes$start[seq(1, nrow(df_genes), by = 10)],
end = df_genes$end[seq(1, nrow(df_genes), by = 10)],
value = rep(1, ceiling(nrow(df_genes) / 10))
),
type = "rect",
color = sample(colors(), ceiling(nrow(df_genes) / 10))
)
Plot Circos plot and save to disk
- use PNG to not get extremely large figures as can happen with vector
graphics like PDF or SVG
- plotting can take a while as there is a lot of information
png("../output/circlize.png", width = 2000, height = 2000, res = 300)
plot_circlize(fasta, gff, extra = extra)
dev.off()
